diff options
220 files changed, 5387 insertions, 3500 deletions
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 651d5237c155..4471a416c274 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -60,7 +60,6 @@ ata *); ssize_t (*listxattr) (struct dentry *, char *, size_t); int (*removexattr) (struct dentry *, const char *); void (*truncate_range)(struct inode *, loff_t, loff_t); - long (*fallocate)(struct inode *inode, int mode, loff_t offset, loff_t len); int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, u64 len); locking rules: @@ -88,7 +87,6 @@ getxattr: no listxattr: no removexattr: yes truncate_range: yes -fallocate: no fiemap: no Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_mutex on victim. @@ -437,6 +435,7 @@ prototypes: ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int); int (*setlease)(struct file *, long, struct file_lock **); + long (*fallocate)(struct file *, int, loff_t, loff_t); }; locking rules: diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig index 943fe6930f77..fc95ee1bcf6f 100644 --- a/arch/alpha/Kconfig +++ b/arch/alpha/Kconfig @@ -68,6 +68,9 @@ config GENERIC_IOMAP bool default n +config GENERIC_HARDIRQS_NO__DO_IRQ + def_bool y + config GENERIC_HARDIRQS bool default y diff --git a/arch/alpha/include/asm/io.h b/arch/alpha/include/asm/io.h index eda9b909aa05..56ff96501350 100644 --- a/arch/alpha/include/asm/io.h +++ b/arch/alpha/include/asm/io.h @@ -37,8 +37,9 @@ */ extern inline void __set_hae(unsigned long new_hae) { - unsigned long flags; - local_irq_save(flags); + unsigned long flags = swpipl(IPL_MAX); + + barrier(); alpha_mv.hae_cache = new_hae; *alpha_mv.hae_register = new_hae; @@ -46,7 +47,8 @@ extern inline void __set_hae(unsigned long new_hae) /* Re-read to make sure it was written. */ new_hae = *alpha_mv.hae_register; - local_irq_restore(flags); + setipl(flags); + barrier(); } extern inline void set_hae(unsigned long new_hae) diff --git a/arch/alpha/kernel/Makefile b/arch/alpha/kernel/Makefile index 1ee9b5b629b8..9bb7b858ed23 100644 --- a/arch/alpha/kernel/Makefile +++ b/arch/alpha/kernel/Makefile @@ -3,8 +3,8 @@ # extra-y := head.o vmlinux.lds -EXTRA_AFLAGS := $(KBUILD_CFLAGS) -EXTRA_CFLAGS := -Werror -Wno-sign-compare +asflags-y := $(KBUILD_CFLAGS) +ccflags-y := -Werror -Wno-sign-compare obj-y := entry.o traps.o process.o init_task.o osf_sys.o irq.o \ irq_alpha.o signal.o setup.o ptrace.o time.o \ diff --git a/arch/alpha/kernel/irq.c b/arch/alpha/kernel/irq.c index fe912984d9b1..9ab234f48dd8 100644 --- a/arch/alpha/kernel/irq.c +++ b/arch/alpha/kernel/irq.c @@ -44,10 +44,11 @@ static char irq_user_affinity[NR_IRQS]; int irq_select_affinity(unsigned int irq) { + struct irq_desc *desc = irq_to_desc[irq]; static int last_cpu; int cpu = last_cpu + 1; - if (!irq_desc[irq].chip->set_affinity || irq_user_affinity[irq]) + if (!desc || !get_irq_desc_chip(desc)->set_affinity || irq_user_affinity[irq]) return 1; while (!cpu_possible(cpu) || @@ -55,8 +56,8 @@ int irq_select_affinity(unsigned int irq) cpu = (cpu < (NR_CPUS-1) ? cpu + 1 : 0); last_cpu = cpu; - cpumask_copy(irq_desc[irq].affinity, cpumask_of(cpu)); - irq_desc[irq].chip->set_affinity(irq, cpumask_of(cpu)); + cpumask_copy(desc->affinity, cpumask_of(cpu)); + get_irq_desc_chip(desc)->set_affinity(irq, cpumask_of(cpu)); return 0; } #endif /* CONFIG_SMP */ @@ -67,6 +68,7 @@ show_interrupts(struct seq_file *p, void *v) int j; int irq = *(loff_t *) v; struct irqaction * action; + struct irq_desc *desc; unsigned long flags; #ifdef CONFIG_SMP @@ -79,8 +81,13 @@ show_interrupts(struct seq_file *p, void *v) #endif if (irq < ACTUAL_NR_IRQS) { - raw_spin_lock_irqsave(&irq_desc[irq].lock, flags); - action = irq_desc[irq].action; + desc = irq_to_desc(irq); + + if (!desc) + return 0; + + raw_spin_lock_irqsave(&desc->lock, flags); + action = desc->action; if (!action) goto unlock; seq_printf(p, "%3d: ", irq); @@ -90,7 +97,7 @@ show_interrupts(struct seq_file *p, void *v) for_each_online_cpu(j) seq_printf(p, "%10u ", kstat_irqs_cpu(irq, j)); #endif - seq_printf(p, " %14s", irq_desc[irq].chip->name); + seq_printf(p, " %14s", get_irq_desc_chip(desc)->name); seq_printf(p, " %c%s", (action->flags & IRQF_DISABLED)?'+':' ', action->name); @@ -103,7 +110,7 @@ show_interrupts(struct seq_file *p, void *v) seq_putc(p, '\n'); unlock: - raw_spin_unlock_irqrestore(&irq_desc[irq].lock, flags); + raw_spin_unlock_irqrestore(&desc->lock, flags); } else if (irq == ACTUAL_NR_IRQS) { #ifdef CONFIG_SMP seq_puts(p, "IPI: "); @@ -142,8 +149,10 @@ handle_irq(int irq) * handled by some other CPU. (or is disabled) */ static unsigned int illegal_count=0; + struct irq_desc *desc = irq_to_desc(irq); - if ((unsigned) irq > ACTUAL_NR_IRQS && illegal_count < MAX_ILLEGAL_IRQS ) { + if (!desc || ((unsigned) irq > ACTUAL_NR_IRQS && + illegal_count < MAX_ILLEGAL_IRQS)) { irq_err_count++; illegal_count++; printk(KERN_CRIT "device_interrupt: invalid interrupt %d\n", @@ -151,14 +160,14 @@ handle_irq(int irq) return; } - irq_enter(); /* - * __do_IRQ() must be called with IPL_MAX. Note that we do not + * From here we must proceed with IPL_MAX. Note that we do not * explicitly enable interrupts afterwards - some MILO PALcode * (namely LX164 one) seems to have severe problems with RTI * at IPL 0. */ local_irq_disable(); - __do_IRQ(irq); + irq_enter(); + generic_handle_irq_desc(irq, desc); irq_exit(); } diff --git a/arch/alpha/kernel/irq_alpha.c b/arch/alpha/kernel/irq_alpha.c index 4c8bb374eb0a..2d0679b60939 100644 --- a/arch/alpha/kernel/irq_alpha.c +++ b/arch/alpha/kernel/irq_alpha.c @@ -219,31 +219,23 @@ process_mcheck_info(unsigned long vector, unsigned long la_ptr, * processed by PALcode, and comes in via entInt vector 1. */ -static void rtc_enable_disable(unsigned int irq) { } -static unsigned int rtc_startup(unsigned int irq) { return 0; } - struct irqaction timer_irqaction = { .handler = timer_interrupt, .flags = IRQF_DISABLED, .name = "timer", }; -static struct irq_chip rtc_irq_type = { - .name = "RTC", - .startup = rtc_startup, - .shutdown = rtc_enable_disable, - .enable = rtc_enable_disable, - .disable = rtc_enable_disable, - .ack = rtc_enable_disable, - .end = rtc_enable_disable, -}; - void __init init_rtc_irq(void) { - irq_desc[RTC_IRQ].status = IRQ_DISABLED; - irq_desc[RTC_IRQ].chip = &rtc_irq_type; - setup_irq(RTC_IRQ, &timer_irqaction); + struct irq_desc *desc = irq_to_desc(RTC_IRQ); + + if (desc) { + desc->status |= IRQ_DISABLED; + set_irq_chip_and_handler_name(RTC_IRQ, &no_irq_chip, + handle_simple_irq, "RTC"); + setup_irq(RTC_IRQ, &timer_irqaction); + } } /* Dummy irqactions. */ diff --git a/arch/alpha/kernel/irq_i8259.c b/arch/alpha/kernel/irq_i8259.c index 83a9ac280890..956ea0ed1694 100644 --- a/arch/alpha/kernel/irq_i8259.c +++ b/arch/alpha/kernel/irq_i8259.c @@ -69,28 +69,11 @@ i8259a_mask_and_ack_irq(unsigned int irq) spin_unlock(&i8259_irq_lock); } -unsigned int -i8259a_startup_irq(unsigned int irq) -{ - i8259a_enable_irq(irq); - return 0; /* never anything pending */ -} - -void -i8259a_end_irq(unsigned int irq) -{ - if (!(irq_desc[irq].status & (IRQ_DISABLED|IRQ_INPROGRESS))) - i8259a_enable_irq(irq); -} - struct irq_chip i8259a_irq_type = { .name = "XT-PIC", - .startup = i8259a_startup_irq, - .shutdown = i8259a_disable_irq, - .enable = i8259a_enable_irq, - .disable = i8259a_disable_irq, - .ack = i8259a_mask_and_ack_irq, - .end = i8259a_end_irq, + .unmask = i8259a_enable_irq, + .mask = i8259a_disable_irq, + .mask_ack = i8259a_mask_and_ack_irq, }; void __init @@ -107,8 +90,7 @@ init_i8259a_irqs(void) outb(0xff, 0xA1); /* mask all of 8259A-2 */ for (i = 0; i < 16; i++) { - irq_desc[i].status = IRQ_DISABLED; - irq_desc[i].chip = &i8259a_irq_type; + set_irq_chip_and_handler(i, &i8259a_irq_type, handle_level_irq); } setup_irq(2, &cascade); diff --git a/arch/alpha/kernel/irq_pyxis.c b/arch/alpha/kernel/irq_pyxis.c index 989ce46a0cf3..2863458c853e 100644 --- a/arch/alpha/kernel/irq_pyxis.c +++ b/arch/alpha/kernel/irq_pyxis.c @@ -40,20 +40,6 @@ pyxis_disable_irq(unsigned int irq) pyxis_update_irq_hw(cached_irq_mask &= ~(1UL << (irq - 16))); } -static unsigned int -pyxis_startup_irq(unsigned int irq) -{ - pyxis_enable_irq(irq); - return 0; -} - -static void -pyxis_end_irq(unsigned int irq) -{ - if (!(irq_desc[irq].status & (IRQ_DISABLED|IRQ_INPROGRESS))) - pyxis_enable_irq(irq); -} - static void pyxis_mask_and_ack_irq(unsigned int irq) { @@ -72,12 +58,9 @@ pyxis_mask_and_ack_irq(unsigned int irq) static struct irq_chip pyxis_irq_type = { .name = "PYXIS", - .startup = pyxis_startup_irq, - .shutdown = pyxis_disable_irq, - .enable = pyxis_enable_irq, - .disable = pyxis_disable_irq, - .ack = pyxis_mask_and_ack_irq, - .end = pyxis_end_irq, + .mask_ack = pyxis_mask_and_ack_irq, + .mask = pyxis_disable_irq, + .unmask = pyxis_enable_irq, }; void @@ -119,8 +102,8 @@ init_pyxis_irqs(unsigned long ignore_mask) for (i = 16; i < 48; ++i) { if ((ignore_mask >> i) & 1) continue; - irq_desc[i].status = IRQ_DISABLED | IRQ_LEVEL; - irq_desc[i].chip = &pyxis_irq_type; + set_irq_chip_and_handler(i, &pyxis_irq_type, handle_level_irq); + irq_to_desc(i)->status |= IRQ_LEVEL; } setup_irq(16+7, &isa_cascade_irqaction); diff --git a/arch/alpha/kernel/irq_srm.c b/arch/alpha/kernel/irq_srm.c index d63e93e1e8bf..0e57e828b413 100644 --- a/arch/alpha/kernel/irq_srm.c +++ b/arch/alpha/kernel/irq_srm.c @@ -33,29 +33,12 @@ srm_disable_irq(unsigned int irq) spin_unlock(&srm_irq_lock); } -static unsigned int -srm_startup_irq(unsigned int irq) -{ - srm_enable_irq(irq); - return 0; -} - -static void -srm_end_irq(unsigned int irq) -{ - if (!(irq_desc[irq].status & (IRQ_DISABLED|IRQ_INPROGRESS))) - srm_enable_irq(irq); -} - /* Handle interrupts from the SRM, assuming no additional weirdness. */ static struct irq_chip srm_irq_type = { .name = "SRM", - .startup = srm_startup_irq, - .shutdown = srm_disable_irq, - .enable = srm_enable_irq, - .disable = srm_disable_irq, - .ack = srm_disable_irq, - .end = srm_end_irq, + .unmask = srm_enable_irq, + .mask = srm_disable_irq, + .mask_ack = srm_disable_irq, }; void __init @@ -68,8 +51,8 @@ init_srm_irqs(long max, unsigned long ignore_mask) for (i = 16; i < max; ++i) { if (i < 64 && ((ignore_mask >> i) & 1)) continue; - irq_desc[i].status = IRQ_DISABLED | IRQ_LEVEL; - irq_desc[i].chip = &srm_irq_type; + set_irq_chip_and_handler(i, &srm_irq_type, handle_level_irq); + irq_to_desc(i)->status |= IRQ_LEVEL; } } diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c index 547e8b84b2f7..fe698b5045e9 100644 --- a/arch/alpha/kernel/osf_sys.c +++ b/arch/alpha/kernel/osf_sys.c @@ -951,9 +951,6 @@ SYSCALL_DEFINE2(osf_utimes, const char __user *, filename, return do_utimes(AT_FDCWD, filename, tvs ? tv : NULL, 0); } -#define MAX_SELECT_SECONDS \ - ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1) - SYSCALL_DEFINE5(osf_select, int, n, fd_set __user *, inp, fd_set __user *, outp, fd_set __user *, exp, struct timeval32 __user *, tvp) { diff --git a/arch/alpha/kernel/sys_alcor.c b/arch/alpha/kernel/sys_alcor.c index 20a30b8b9655..7bef61768236 100644 --- a/arch/alpha/kernel/sys_alcor.c +++ b/arch/alpha/kernel/sys_alcor.c @@ -65,13 +65,6 @@ alcor_mask_and_ack_irq(unsigned int irq) *(vuip)GRU_INT_CLEAR = 0; mb(); } -static unsigned int -alcor_startup_irq(unsigned int irq) -{ - alcor_enable_irq(irq); - return 0; -} - static void alcor_isa_mask_and_ack_irq(unsigned int irq) { @@ -82,21 +75,11 @@ alcor_isa_mask_and_ack_irq(unsigned int irq) *(vuip)GRU_INT_CLEAR = 0; mb(); } -static void -alcor_end_irq(unsigned int irq) -{ - if (!(irq_desc[irq].status & (IRQ_DISABLED|IRQ_INPROGRESS))) - alcor_enable_irq(irq); -} - static struct irq_chip alcor_irq_type = { .name = "ALCOR", - .startup = alcor_startup_irq, - .shutdown = alcor_disable_irq, - .enable = alcor_enable_irq, - .disable = alcor_disable_irq, - .ack = alcor_mask_and_ack_irq, - .end = alcor_end_irq, + .unmask = alcor_enable_irq, + .mask = alcor_disable_irq, + .mask_ack = alcor_mask_and_ack_irq, }; static void @@ -142,8 +125,8 @@ alcor_init_irq(void) on while IRQ probing. */ if (i >= 16+20 && i <= 16+30) continue; - irq_desc[i].status = IRQ_DISABLED | IRQ_LEVEL; - irq_desc[i].chip = &alcor_irq_type; + set_irq_chip_and_handler(i, &alcor_irq_type, handle_level_irq); + irq_to_desc(i)->status |= IRQ_LEVEL; } i8259a_irq_type.ack = alcor_isa_mask_and_ack_irq; diff --git a/arch/alpha/kernel/sys_cabriolet.c b/arch/alpha/kernel/sys_cabriolet.c index 14c8898d19ec..b0c916493aea 100644 --- a/arch/alpha/kernel/sys_cabriolet.c +++ b/arch/alpha/kernel/sys_cabriolet.c @@ -57,28 +57,11 @@ cabriolet_disable_irq(unsigned int irq) cabriolet_update_irq_hw(irq, cached_irq_mask |= 1UL << irq); } -static unsigned int -cabriolet_startup_irq(unsigned int irq) -{ - cabriolet_enable_irq(irq); - return 0; /* never anything pending */ -} - -static void -cabriolet_end_irq(unsigned int irq) -{ - if (!(irq_desc[irq].status & (IRQ_DISABLED|IRQ_INPROGRESS))) - cabriolet_enable_irq(irq); -} - static struct irq_chip cabriolet_irq_type = { .name = "CABRIOLET", - .startup = cabriolet_startup_irq, - .shutdown = cabriolet_disable_irq, - .enable = cabriolet_enable_irq, - .disable = cabriolet_disable_irq, - .ack = cabriolet_disable_irq, - .end = cabriolet_end_irq, + .unmask = cabriolet_enable_irq, + .mask = cabriolet_disable_irq, + .mask_ack = cabriolet_disable_irq, }; static void @@ -122,8 +105,9 @@ common_init_irq(void (*srm_dev_int)(unsigned long v)) outb(0xff, 0x806); for (i = 16; i < 35; ++i) { - irq_desc[i].status = IRQ_DISABLED | IRQ_LEVEL; - irq_desc[i].chip = &cabriolet_irq_type; + set_irq_chip_and_handler(i, &cabriolet_irq_type, + handle_level_irq); + irq_to_desc(i)->status |= IRQ_LEVEL; } } diff --git a/arch/alpha/kernel/sys_dp264.c b/arch/alpha/kernel/sys_dp264.c index 4026502ab707..edad5f759ccd 100644 --- a/arch/alpha/kernel/sys_dp264.c +++ b/arch/alpha/kernel/sys_dp264.c @@ -115,20 +115,6 @@ dp264_disable_irq(unsigned int irq) spin_unlock(&dp264_irq_lock); } -static unsigned int -dp264_startup_irq(unsigned int irq) -{ - dp264_enable_irq(irq); - return 0; /* never anything pending */ -} - -static void -dp264_end_irq(unsigned int irq) -{ - if (!(irq_desc[irq].status & (IRQ_DISABLED|IRQ_INPROGRESS))) - dp264_enable_irq(irq); -} - static void clipper_enable_irq(unsigned int irq) { @@ -147,20 +133,6 @@ clipper_disable_irq(unsigned int irq) spin_unlock(&dp264_irq_lock); } -static unsigned int -clipper_startup_irq(unsigned int irq) -{ - clipper_enable_irq(irq); - return 0; /* never anything pending */ -} - -static void -clipper_end_irq(unsigned int irq) -{ - if (!(irq_desc[irq].status & (IRQ_DISABLED|IRQ_INPROGRESS))) - clipper_enable_irq(irq); -} - static void cpu_set_irq_affinity(unsigned int irq, cpumask_t affinity) { @@ -200,23 +172,17 @@ clipper_set_affinity(unsigned int irq, const struct cpumask *affinity) static struct irq_chip dp264_irq_type = { .name = "DP264", - .startup = dp264_startup_irq, - .shutdown = dp264_disable_irq, - .enable = dp264_enable_irq, - .disable = dp264_disable_irq, - .ack = dp264_disable_irq, - .end = dp264_end_irq, + .unmask = dp264_enable_irq, + .mask = dp264_disable_irq, + .mask_ack = dp264_disable_irq, .set_affinity = dp264_set_affinity, }; static struct irq_chip clipper_irq_type = { .name = "CLIPPER", - .startup = clipper_startup_irq, - .shutdown = clipper_disable_irq, - .enable = clipper_enable_irq, - .disable = clipper_disable_irq, - .ack = clipper_disable_irq, - .end = clipper_end_irq, + .unmask = clipper_enable_irq, + .mask = clipper_disable_irq, + .mask_ack = clipper_disable_irq, .set_affinity = clipper_set_affinity, }; @@ -302,8 +268,8 @@ init_tsunami_irqs(struct irq_chip * ops, int imin, int imax) { long i; for (i = imin; i <= imax; ++i) { - irq_desc[i].status = IRQ_DISABLED | IRQ_LEVEL; - irq_desc[i].chip = ops; + irq_to_desc(i)->status |= IRQ_LEVEL; + set_irq_chip_and_handler(i, ops, handle_level_irq); } } diff --git a/arch/alpha/kernel/sys_eb64p.c b/arch/alpha/kernel/sys_eb64p.c index df2090ce5e7f..ae5f29d127b0 100644 --- a/arch/alpha/kernel/sys_eb64p.c +++ b/arch/alpha/kernel/sys_eb64p.c @@ -55,28 +55,11 @@ eb64p_disable_irq(unsigned int irq) eb64p_update_irq_hw(irq, cached_irq_mask |= 1 << irq); } -static unsigned int -eb64p_startup_irq(unsigned int irq) -{ - eb64p_enable_irq(irq); - return 0; /* never anything pending */ -} - -static void -eb64p_end_irq(unsigned int irq) -{ - if (!(irq_desc[irq].status & (IRQ_DISABLED|IRQ_INPROGRESS))) - eb64p_enable_irq(irq); -} - static struct irq_chip eb64p_irq_type = { .name = "EB64P", - .startup = eb64p_startup_irq, - .shutdown = eb64p_disable_irq, - .enable = eb64p_enable_irq, - .disable = eb64p_disable_irq, - .ack = eb64p_disable_irq, - .end = eb64p_end_irq, + .unmask = eb64p_enable_irq, + .mask = eb64p_disable_irq, + .mask_ack = eb64p_disable_irq, }; static void @@ -135,8 +118,8 @@ eb64p_init_irq(void) init_i8259a_irqs(); for (i = 16; i < 32; ++i) { - irq_desc[i].status = IRQ_DISABLED | IRQ_LEVEL; - irq_desc[i].chip = &eb64p_irq_type; + irq_to_desc(i)->status |= IRQ_LEVEL; + set_irq_chip_and_handler(i, &eb64p_irq_type, handle_level_irq); } common_init_isa_dma(); diff --git a/arch/alpha/kernel/sys_eiger.c b/arch/alpha/kernel/sys_eiger.c index 3ca1dbcf4044..1121bc5c6c6c 100644 --- a/arch/alpha/kernel/sys_eiger.c +++ b/arch/alpha/kernel/sys_eiger.c @@ -66,28 +66,11 @@ eiger_disable_irq(unsigned int irq) eiger_update_irq_hw(irq, mask); } -static unsigned int -eiger_startup_irq(unsigned int irq) -{ - eiger_enable_irq(irq); - return 0; /* never anything pending */ -} - -static void -eiger_end_irq(unsigned int irq) -{ - if (!(irq_desc[irq].status & (IRQ_DISABLED|IRQ_INPROGRESS))) - eiger_enable_irq(irq); -} - static struct irq_chip eiger_irq_type = { .name = "EIGER", - .startup = eiger_startup_irq, - .shutdown = eiger_disable_irq, - .enable = eiger_enable_irq, - .disable = eiger_disable_irq, - .ack = eiger_disable_irq, - .end = eiger_end_irq, + .unmask = eiger_enable_irq, + .mask = eiger_disable_irq, + .mask_ack = eiger_disable_irq, }; static void @@ -153,8 +136,8 @@ eiger_init_irq(void) init_i8259a_irqs(); for (i = 16; i < 128; ++i) { - irq_desc[i].status = IRQ_DISABLED | IRQ_LEVEL; - irq_desc[i].chip = &eiger_irq_type; + irq_to_desc(i)->status |= IRQ_LEVEL; + set_irq_chip_and_handler(i, &eiger_irq_type, handle_level_irq); } } diff --git a/arch/alpha/kernel/sys_jensen.c b/arch/alpha/kernel/sys_jensen.c index 7a7ae36fff91..34f55e03d331 100644 --- a/arch/alpha/kernel/sys_jensen.c +++ b/arch/alpha/kernel/sys_jensen.c @@ -62,30 +62,6 @@ * world. */ -static unsigned int -jensen_local_startup(unsigned int irq) -{ - /* the parport is really hw IRQ 1, silly Jensen. */ - if (irq == 7) - i8259a_startup_irq(1); - else - /* - * For all true local interrupts, set the flag that prevents - * the IPL from being dropped during handler processing. - */ - if (irq_desc[irq].action) - irq_desc[irq].action->flags |= IRQF_DISABLED; - return 0; -} - -static void -jensen_local_shutdown(unsigned int irq) -{ - /* the parport is really hw IRQ 1, silly Jensen. */ - if (irq == 7) - i8259a_disable_irq(1); -} - static void jensen_local_enable(unsigned int irq) { @@ -103,29 +79,18 @@ jensen_local_disable(unsigned int irq) } static void -jensen_local_ack(unsigned int irq) +jensen_local_mask_ack(unsigned int irq) { /* the parport is really hw IRQ 1, silly Jensen. */ if (irq == 7) i8259a_mask_and_ack_irq(1); } -static void -jensen_local_end(unsigned int irq) -{ - /* the parport is really hw IRQ 1, silly Jensen. */ - if (irq == 7) - i8259a_end_irq(1); -} - static struct irq_chip jensen_local_irq_type = { .name = "LOCAL", - .startup = jensen_local_startup, - .shutdown = jensen_local_shutdown, - .enable = jensen_local_enable, - .disable = jensen_local_disable, - .ack = jensen_local_ack, - .end = jensen_local_end, + .unmask = jensen_local_enable, + .mask = jensen_local_disable, + .mask_ack = jensen_local_mask_ack, }; static void @@ -158,7 +123,7 @@ jensen_device_interrupt(unsigned long vector) } /* If there is no handler yet... */ - if (irq_desc[irq].action == NULL) { + if (!irq_has_action(irq)) { /* If it is a local interrupt that cannot be masked... */ if (vector >= 0x900) { @@ -206,11 +171,11 @@ jensen_init_irq(void) { init_i8259a_irqs(); - irq_desc[1].chip = &jensen_local_irq_type; - irq_desc[4].chip = &jensen_local_irq_type; - irq_desc[3].chip = &jensen_local_irq_type; - irq_desc[7].chip = &jensen_local_irq_type; - irq_desc[9].chip = &jensen_local_irq_type; + set_irq_chip_and_handler(1, &jensen_local_irq_type, handle_level_irq); + set_irq_chip_and_handler(4, &jensen_local_irq_type, handle_level_irq); + set_irq_chip_and_handler(3, &jensen_local_irq_type, handle_level_irq); + set_irq_chip_and_handler(7, &jensen_local_irq_type, handle_level_irq); + set_irq_chip_and_handler(9, &jensen_local_irq_type, handle_level_irq); common_init_isa_dma(); } diff --git a/arch/alpha/kernel/sys_marvel.c b/arch/alpha/kernel/sys_marvel.c index 0bb3b5c4f693..2bfc9f1b1ddc 100644 --- a/arch/alpha/kernel/sys_marvel.c +++ b/arch/alpha/kernel/sys_marvel.c @@ -143,20 +143,6 @@ io7_disable_irq(unsigned int irq) spin_unlock(&io7->irq_lock); } -static unsigned int -io7_startup_irq(unsigned int irq) -{ - io7_enable_irq(irq); - return 0; /* never anything pending */ -} - -static void -io7_end_irq(unsigned int irq) -{ - if (!(irq_desc[irq].status & (IRQ_DISABLED|IRQ_INPROGRESS))) - io7_enable_irq(irq); -} - static void marvel_irq_noop(unsigned int irq) { @@ -171,32 +157,22 @@ marvel_irq_noop_return(unsigned int irq) static struct irq_chip marvel_legacy_irq_type = { .name = "LEGACY", - .startup = marvel_irq_noop_return, - .shutdown = marvel_irq_noop, - .enable = marvel_irq_noop, - .disable = marvel_irq_noop, - .ack = marvel_irq_noop, - .end = marvel_irq_noop, + .mask = marvel_irq_noop, + .unmask = marvel_irq_noop, }; static struct irq_chip io7_lsi_irq_type = { .name = "LSI", - .startup = io7_startup_irq, - .shutdown = io7_disable_irq, - .enable = io7_enable_irq, - .disable = io7_disable_irq, - .ack = io7_disable_irq, - .end = io7_end_irq, + .unmask = io7_enable_irq, + .mask = io7_disable_irq, + .mask_ack = io7_disable_irq, }; static struct irq_chip io7_msi_irq_type = { .name = "MSI", - .startup = io7_startup_irq, - .shutdown = io7_disable_irq, - .enable = io7_enable_irq, - .disable = io7_disable_irq, + .unmask = io7_enable_irq, + .mask = io7_disable_irq, .ack = marvel_irq_noop, - .end = io7_end_irq, }; static void @@ -304,8 +280,8 @@ init_io7_irqs(struct io7 *io7, /* Set up the lsi irqs. */ for (i = 0; i < 128; ++i) { - irq_desc[base + i].status = IRQ_DISABLED | IRQ_LEVEL; - irq_desc[base + i].chip = lsi_ops; + irq_to_desc(base + i)->status |= IRQ_LEVEL; + set_irq_chip_and_handler(base + i, lsi_ops, handle_level_irq); } /* Disable the implemented irqs in hardware. */ @@ -318,8 +294,8 @@ init_io7_irqs(struct io7 *io7, /* Set up the msi irqs. */ for (i = 128; i < (128 + 512); ++i) { - irq_desc[base + i].status = IRQ_DISABLED | IRQ_LEVEL; - irq_desc[base + i].chip = msi_ops; + irq_to_desc(base + i)->status |= IRQ_LEVEL; + set_irq_chip_and_handler(base + i, msi_ops, handle_level_irq); } for (i = 0; i < 16; ++i) @@ -336,8 +312,8 @@ marvel_init_irq(void) /* Reserve the legacy irqs. */ for (i = 0; i < 16; ++i) { - irq_desc[i].status = IRQ_DISABLED; - irq_desc[i].chip = &marvel_legacy_irq_type; + set_irq_chip_and_handler(i, &marvel_legacy_irq_type, + handle_level_irq); } /* Init the io7 irqs. */ diff --git a/arch/alpha/kernel/sys_mikasa.c b/arch/alpha/kernel/sys_mikasa.c index ee8865169811..bcc1639e8efb 100644 --- a/arch/alpha/kernel/sys_mikasa.c +++ b/arch/alpha/kernel/sys_mikasa.c @@ -54,28 +54,11 @@ mikasa_disable_irq(unsigned int irq) mikasa_update_irq_hw(cached_irq_mask &= ~(1 << (irq - 16))); } -static unsigned int -mikasa_startup_irq(unsigned int irq) -{ - mikasa_enable_irq(irq); - return 0; -} - -static void -mikasa_end_irq(unsigned int irq) -{ - if (!(irq_desc[irq].status & (IRQ_DISABLED|IRQ_INPROGRESS))) - mikasa_enable_irq(irq); -} - static struct irq_chip mikasa_irq_type = { .name = "MIKASA", - .startup = mikasa_startup_irq, - .shutdown = mikasa_disable_irq, - .enable = mikasa_enable_irq, - .disable = mikasa_disable_irq, - .ack = mikasa_disable_irq, - .end = mikasa_end_irq, + .unmask = mikasa_enable_irq, + .mask = mikasa_disable_irq, + .mask_ack = mikasa_disable_irq, }; static void @@ -115,8 +98,8 @@ mikasa_init_irq(void) mikasa_update_irq_hw(0); for (i = 16; i < 32; ++i) { - irq_desc[i].status = IRQ_DISABLED | IRQ_LEVEL; - irq_desc[i].chip = &mikasa_irq_type; + irq_to_desc(i)->status |= IRQ_LEVEL; + set_irq_chip_and_handler(i, &mikasa_irq_type, handle_level_irq); } init_i8259a_irqs(); diff --git a/arch/alpha/kernel/sys_noritake.c b/arch/alpha/kernel/sys_noritake.c index 86503fe73a88..e88f4ae1260e 100644 --- a/arch/alpha/kernel/sys_noritake.c +++ b/arch/alpha/kernel/sys_noritake.c @@ -59,28 +59,11 @@ noritake_disable_irq(unsigned int irq) noritake_update_irq_hw(irq, cached_irq_mask &= ~(1 << (irq - 16))); } -static unsigned int -noritake_startup_irq(unsigned int irq) -{ - noritake_enable_irq(irq); - return 0; -} - -static void -noritake_end_irq(unsigned int irq) -{ - if (!(irq_desc[irq].status & (IRQ_DISABLED|IRQ_INPROGRESS))) - noritake_enable_irq(irq); -} - static struct irq_chip noritake_irq_type = { .name = "NORITAKE", - .startup = noritake_startup_irq, - .shutdown = noritake_disable_irq, - .enable = noritake_enable_irq, - .disable = noritake_disable_irq, - .ack = noritake_disable_irq, - .end = noritake_end_irq, + .unmask = noritake_enable_irq, + .mask = noritake_disable_irq, + .mask_ack = noritake_disable_irq, }; static void @@ -144,8 +127,8 @@ noritake_init_irq(void) outw(0, 0x54c); for (i = 16; i < 48; ++i) { - irq_desc[i].status = IRQ_DISABLED | IRQ_LEVEL; - irq_desc[i].chip = &noritake_irq_type; + irq_to_desc(i)->status |= IRQ_LEVEL; + set_irq_chip_and_handler(i, &noritake_irq_type, handle_level_irq); } init_i8259a_irqs(); diff --git a/arch/alpha/kernel/sys_rawhide.c b/arch/alpha/kernel/sys_rawhide.c index 26c322bf89ee..6a51364dd1cc 100644 --- a/arch/alpha/kernel/sys_rawhide.c +++ b/arch/alpha/kernel/sys_rawhide.c @@ -121,28 +121,11 @@ rawhide_mask_and_ack_irq(unsigned int irq) spin_unlock(&rawhide_irq_lock); } -static unsigned int -rawhide_startup_irq(unsigned int irq) -{ - rawhide_enable_irq(irq); - return 0; -} - -static void -rawhide_end_irq(unsigned int irq) -{ - if (!(irq_desc[irq].status & (IRQ_DISABLED|IRQ_INPROGRESS))) - rawhide_enable_irq(irq); -} - static struct irq_chip rawhide_irq_type = { .name = "RAWHIDE", - .startup = rawhide_startup_irq, - .shutdown = rawhide_disable_irq, - .enable = rawhide_enable_irq, - .disable = rawhide_disable_irq, - .ack = rawhide_mask_and_ack_irq, - .end = rawhide_end_irq, + .unmask = rawhide_enable_irq, + .mask = rawhide_disable_irq, + .mask_ack = rawhide_mask_and_ack_irq, }; static void @@ -194,8 +177,8 @@ rawhide_init_irq(void) } for (i = 16; i < 128; ++i) { - irq_desc[i].status = IRQ_DISABLED | IRQ_LEVEL; - irq_desc[i].chip = &rawhide_irq_type; + irq_to_desc(i)->status |= IRQ_LEVEL; + set_irq_chip_and_handler(i, &rawhide_irq_type, handle_level_irq); } init_i8259a_irqs(); diff --git a/arch/alpha/kernel/sys_rx164.c b/arch/alpha/kernel/sys_rx164.c index be161129eab9..89e7e37ec84c 100644 --- a/arch/alpha/kernel/sys_rx164.c +++ b/arch/alpha/kernel/sys_rx164.c @@ -58,28 +58,11 @@ rx164_disable_irq(unsigned int irq) rx164_update_irq_hw(cached_irq_mask &= ~(1UL << (irq - 16))); } -static unsigned int -rx164_startup_irq(unsigned int irq) -{ - rx164_enable_irq(irq); - return 0; -} - -static void -rx164_end_irq(unsigned int irq) -{ - if (!(irq_desc[irq].status & (IRQ_DISABLED|IRQ_INPROGRESS))) - rx164_enable_irq(irq); -} - static struct irq_chip rx164_irq_type = { .name = "RX164", - .startup = rx164_startup_irq, - .shutdown = rx164_disable_irq, - .enable = rx164_enable_irq, - .disable = rx164_disable_irq, - .ack = rx164_disable_irq, - .end = rx164_end_irq, + .unmask = rx164_enable_irq, + .mask = rx164_disable_irq, + .mask_ack = rx164_disable_irq, }; static void @@ -116,8 +99,8 @@ rx164_init_irq(void) rx164_update_irq_hw(0); for (i = 16; i < 40; ++i) { - irq_desc[i].status = IRQ_DISABLED | IRQ_LEVEL; - irq_desc[i].chip = &rx164_irq_type; + irq_to_desc(i)->status |= IRQ_LEVEL; + set_irq_chip_and_handler(i, &rx164_irq_type, handle_level_irq); } init_i8259a_irqs(); diff --git a/arch/alpha/kernel/sys_sable.c b/arch/alpha/kernel/sys_sable.c index b2abe27a23cf..5c4423d1b06c 100644 --- a/arch/alpha/kernel/sys_sable.c +++ b/arch/alpha/kernel/sys_sable.c @@ -474,20 +474,6 @@ sable_lynx_disable_irq(unsigned int irq) #endif } -static unsigned int -sable_lynx_startup_irq(unsigned int irq) -{ - sable_lynx_enable_irq(irq); - return 0; -} - -static void -sable_lynx_end_irq(unsigned int irq) -{ - if (!(irq_desc[irq].status & (IRQ_DISABLED|IRQ_INPROGRESS))) - sable_lynx_enable_irq(irq); -} - static void sable_lynx_mask_and_ack_irq(unsigned int irq) { @@ -503,12 +489,9 @@ sable_lynx_mask_and_ack_irq(unsigned int irq) static struct irq_chip sable_lynx_irq_type = { .name = "SABLE/LYNX", - .startup = sable_lynx_startup_irq, - .shutdown = sable_lynx_disable_irq, - .enable = sable_lynx_enable_irq, - .disable = sable_lynx_disable_irq, - .ack = sable_lynx_mask_and_ack_irq, - .end = sable_lynx_end_irq, + .unmask = sable_lynx_enable_irq, + .mask = sable_lynx_disable_irq, + .mask_ack = sable_lynx_mask_and_ack_irq, }; static void @@ -535,8 +518,9 @@ sable_lynx_init_irq(int nr_of_irqs) long i; for (i = 0; i < nr_of_irqs; ++i) { - irq_desc[i].status = IRQ_DISABLED | IRQ_LEVEL; - irq_desc[i].chip = &sable_lynx_irq_type; + irq_to_desc(i)->status |= IRQ_LEVEL; + set_irq_chip_and_handler(i, &sable_lynx_irq_type, + handle_level_irq); } common_init_isa_dma(); diff --git a/arch/alpha/kernel/sys_takara.c b/arch/alpha/kernel/sys_takara.c index 4da596b6adbb..f8a1e8a862fb 100644 --- a/arch/alpha/kernel/sys_takara.c +++ b/arch/alpha/kernel/sys_takara.c @@ -60,28 +60,11 @@ takara_disable_irq(unsigned int irq) takara_update_irq_hw(irq, mask); } -static unsigned int -takara_startup_irq(unsigned int irq) -{ - takara_enable_irq(irq); - return 0; /* never anything pending */ -} - -static void -takara_end_irq(unsigned int irq) -{ - if (!(irq_desc[irq].status & (IRQ_DISABLED|IRQ_INPROGRESS))) - takara_enable_irq(irq); -} - static struct irq_chip takara_irq_type = { .name = "TAKARA", - .startup = takara_startup_irq, - .shutdown = takara_disable_irq, - .enable = takara_enable_irq, - .disable = takara_disable_irq, - .ack = takara_disable_irq, - .end = takara_end_irq, + .unmask = takara_enable_irq, + .mask = takara_disable_irq, + .mask_ack = takara_disable_irq, }; static void @@ -153,8 +136,8 @@ takara_init_irq(void) takara_update_irq_hw(i, -1); for (i = 16; i < 128; ++i) { - irq_desc[i].status = IRQ_DISABLED | IRQ_LEVEL; - irq_desc[i].chip = &takara_irq_type; + irq_to_desc(i)->status |= IRQ_LEVEL; + set_irq_chip_and_handler(i, &takara_irq_type, handle_level_irq); } common_init_isa_dma(); diff --git a/arch/alpha/kernel/sys_titan.c b/arch/alpha/kernel/sys_titan.c index 9008d0f20c53..e02494bf5ef3 100644 --- a/arch/alpha/kernel/sys_titan.c +++ b/arch/alpha/kernel/sys_titan.c @@ -129,20 +129,6 @@ titan_disable_irq(unsigned int irq) spin_unlock(&titan_irq_lock); } -static unsigned int -titan_startup_irq(unsigned int irq) -{ - titan_enable_irq(irq); - return 0; /* never anything pending */ -} - -static void -titan_end_irq(unsigned int irq) -{ - if (!(irq_desc[irq].status & (IRQ_DISABLED|IRQ_INPROGRESS))) - titan_enable_irq(irq); -} - static void titan_cpu_set_irq_affinity(unsigned int irq, cpumask_t affinity) { @@ -189,20 +175,17 @@ init_titan_irqs(struct irq_chip * ops, int imin, int imax) { long i; for (i = imin; i <= imax; ++i) { - irq_desc[i].status = IRQ_DISABLED | IRQ_LEVEL; - irq_desc[i].chip = ops; + irq_to_desc(i)->status |= IRQ_LEVEL; + set_irq_chip_and_handler(i, ops, handle_level_irq); } } static struct irq_chip titan_irq_type = { - .name = "TITAN", - .startup = titan_startup_irq, - .shutdown = titan_disable_irq, - .enable = titan_enable_irq, - .disable = titan_disable_irq, - .ack = titan_disable_irq, - .end = titan_end_irq, - .set_affinity = titan_set_irq_affinity, + .name = "TITAN", + .unmask = titan_enable_irq, + .mask = titan_disable_irq, + .mask_ack = titan_disable_irq, + .set_affinity = titan_set_irq_affinity, }; static irqreturn_t diff --git a/arch/alpha/kernel/sys_wildfire.c b/arch/alpha/kernel/sys_wildfire.c index 62fd972e18ef..eec52594d410 100644 --- a/arch/alpha/kernel/sys_wildfire.c +++ b/arch/alpha/kernel/sys_wildfire.c @@ -139,32 +139,11 @@ wildfire_mask_and_ack_irq(unsigned int irq) spin_unlock(&wildfire_irq_lock); } -static unsigned int -wildfire_startup_irq(unsigned int irq) -{ - wildfire_enable_irq(irq); - return 0; /* never anything pending */ -} - -static void -wildfire_end_irq(unsigned int irq) -{ -#if 0 - if (!irq_desc[irq].action) - printk("got irq %d\n", irq); -#endif - if (!(irq_desc[irq].status & (IRQ_DISABLED|IRQ_INPROGRESS))) - wildfire_enable_irq(irq); -} - static struct irq_chip wildfire_irq_type = { .name = "WILDFIRE", - .startup = wildfire_startup_irq, - .shutdown = wildfire_disable_irq, - .enable = wildfire_enable_irq, - .disable = wildfire_disable_irq, - .ack = wildfire_mask_and_ack_irq, - .end = wildfire_end_irq, + .unmask = wildfire_enable_irq, + .mask = wildfire_disable_irq, + .mask_ack = wildfire_mask_and_ack_irq, }; static void __init @@ -198,15 +177,18 @@ wildfire_init_irq_per_pca(int qbbno, int pcano) for (i = 0; i < 16; ++i) { if (i == 2) continue; - irq_desc[i+irq_bias].status = IRQ_DISABLED | IRQ_LEVEL; - irq_desc[i+irq_bias].chip = &wildfire_irq_type; + irq_to_desc(i+irq_bias)->status |= IRQ_LEVEL; + set_irq_chip_and_handler(i+irq_bias, &wildfire_irq_type, + handle_level_irq); } - irq_desc[36+irq_bias].status = IRQ_DISABLED | IRQ_LEVEL; - irq_desc[36+irq_bias].chip = &wildfire_irq_type; + irq_to_desc(36+irq_bias)->status |= IRQ_LEVEL; + set_irq_chip_and_handler(36+irq_bias, &wildfire_irq_type, + handle_level_irq); for (i = 40; i < 64; ++i) { - irq_desc[i+irq_bias].status = IRQ_DISABLED | IRQ_LEVEL; - irq_desc[i+irq_bias].chip = &wildfire_irq_type; + irq_to_desc(i+irq_bias)->status |= IRQ_LEVEL; + set_irq_chip_and_handler(i+irq_bias, &wildfire_irq_type, + handle_level_irq); } setup_irq(32+irq_bias, &isa_enable); diff --git a/arch/alpha/lib/Makefile b/arch/alpha/lib/Makefile index 9b72c59c95be..c0a83ab62b78 100644 --- a/arch/alpha/lib/Makefile +++ b/arch/alpha/lib/Makefile @@ -2,8 +2,8 @@ # Makefile for alpha-specific library files.. # -EXTRA_AFLAGS := $(KBUILD_CFLAGS) -EXTRA_CFLAGS := -Werror +asflags-y := $(KBUILD_CFLAGS) +ccflags-y := -Werror # Many of these routines have implementations tuned for ev6. # Choose them iff we're targeting ev6 specifically. diff --git a/arch/alpha/math-emu/Makefile b/arch/alpha/math-emu/Makefile index 359ef087e69e..7f4671995245 100644 --- a/arch/alpha/math-emu/Makefile +++ b/arch/alpha/math-emu/Makefile @@ -2,7 +2,7 @@ # Makefile for the FPU instruction emulation. # -EXTRA_CFLAGS := -w +ccflags-y := -w obj-$(CONFIG_MATHEMU) += math-emu.o diff --git a/arch/alpha/mm/Makefile b/arch/alpha/mm/Makefile index 09399c5386cb..c993d3f93cf6 100644 --- a/arch/alpha/mm/Makefile +++ b/arch/alpha/mm/Makefile @@ -2,7 +2,7 @@ # Makefile for the linux alpha-specific parts of the memory manager. # -EXTRA_CFLAGS := -Werror +ccflags-y := -Werror obj-y := init.o fault.o extable.o diff --git a/arch/alpha/oprofile/Makefile b/arch/alpha/oprofile/Makefile index 4aa56247bdc6..3473de751b03 100644 --- a/arch/alpha/oprofile/Makefile +++ b/arch/alpha/oprofile/Makefile @@ -1,4 +1,4 @@ -EXTRA_CFLAGS := -Werror -Wno-sign-compare +ccflags-y := -Werror -Wno-sign-compare obj-$(CONFIG_OPROFILE) += oprofile.o diff --git a/arch/arm/mach-omap1/board-ams-delta.c b/arch/arm/mach-omap1/board-ams-delta.c index bd0495a9ac3b..22cc8c8df6cb 100644 --- a/arch/arm/mach-omap1/board-ams-delta.c +++ b/arch/arm/mach-omap1/board-ams-delta.c @@ -179,6 +179,22 @@ static struct omap_board_config_kernel ams_delta_config[] = { { OMAP_TAG_LCD, &ams_delta_lcd_config }, }; +static struct resource ams_delta_nand_resources[] = { + [0] = { + .start = OMAP1_MPUIO_BASE, + .end = OMAP1_MPUIO_BASE + + OMAP_MPUIO_IO_CNTL + sizeof(u32) - 1, + .flags = IORESOURCE_MEM, + }, +}; + +static struct platform_device ams_delta_nand_device = { + .name = "ams-delta-nand", + .id = -1, + .num_resources = ARRAY_SIZE(ams_delta_nand_resources), + .resource = ams_delta_nand_resources, +}; + static struct resource ams_delta_kp_resources[] = { [0] = { .start = INT_KEYBOARD, @@ -265,6 +281,7 @@ static struct omap1_cam_platform_data ams_delta_camera_platform_data = { }; static struct platform_device *ams_delta_devices[] __initdata = { + &ams_delta_nand_device, &ams_delta_kp_device, &ams_delta_lcd_device, &ams_delta_led_device, diff --git a/arch/arm/plat-nomadik/include/plat/ste_dma40.h b/arch/arm/plat-nomadik/include/plat/ste_dma40.h index 74b62f10d07f..4d6dd4c39b75 100644 --- a/arch/arm/plat-nomadik/include/plat/ste_dma40.h +++ b/arch/arm/plat-nomadik/include/plat/ste_dma40.h @@ -13,6 +13,14 @@ #include <linux/workqueue.h> #include <linux/interrupt.h> +/* + * Maxium size for a single dma descriptor + * Size is limited to 16 bits. + * Size is in the units of addr-widths (1,2,4,8 bytes) + * Larger transfers will be split up to multiple linked desc + */ +#define STEDMA40_MAX_SEG_SIZE 0xFFFF + /* dev types for memcpy */ #define STEDMA40_DEV_DST_MEMORY (-1) #define STEDMA40_DEV_SRC_MEMORY (-1) diff --git a/arch/arm/plat-omap/include/plat/onenand.h b/arch/arm/plat-omap/include/plat/onenand.h index 72f433d7d827..affe87e9ece7 100644 --- a/arch/arm/plat-omap/include/plat/onenand.h +++ b/arch/arm/plat-omap/include/plat/onenand.h @@ -23,6 +23,7 @@ struct omap_onenand_platform_data { int (*onenand_setup)(void __iomem *, int freq); int dma_channel; u8 flags; + u8 regulator_can_sleep; }; #define ONENAND_MAX_PARTITIONS 8 diff --git a/arch/microblaze/kernel/prom.c b/arch/microblaze/kernel/prom.c index c881393f07fd..bceaa5543e39 100644 --- a/arch/microblaze/kernel/prom.c +++ b/arch/microblaze/kernel/prom.c @@ -47,9 +47,9 @@ void __init early_init_dt_add_memory_arch(u64 base, u64 size) memblock_add(base, size); } -u64 __init early_init_dt_alloc_memory_arch(u64 size, u64 align) +void * __init early_init_dt_alloc_memory_arch(u64 size, u64 align) { - return memblock_alloc(size, align); + return __va(memblock_alloc(size, align)); } #ifdef CONFIG_EARLY_PRINTK diff --git a/arch/mips/kernel/prom.c b/arch/mips/kernel/prom.c index 9dbe58368953..a19811e98a41 100644 --- a/arch/mips/kernel/prom.c +++ b/arch/mips/kernel/prom.c @@ -45,11 +45,9 @@ void __init free_mem_mach(unsigned long addr, unsigned long size) return free_bootmem(addr, size); } -u64 __init early_init_dt_alloc_memory_arch(u64 size, u64 align) +void * __init early_init_dt_alloc_memory_arch(u64 size, u64 align) { - return virt_to_phys( - __alloc_bootmem(size, align, __pa(MAX_DMA_ADDRESS)) - ); + return __alloc_bootmem(size, align, __pa(MAX_DMA_ADDRESS)); } #ifdef CONFIG_BLK_DEV_INITRD diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 9e3132db718b..7185f0da7dc3 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -519,9 +519,9 @@ void __init early_init_dt_add_memory_arch(u64 base, u64 size) memblock_add(base, size); } -u64 __init early_init_dt_alloc_memory_arch(u64 size, u64 align) +void * __init early_init_dt_alloc_memory_arch(u64 size, u64 align) { - return memblock_alloc(size, align); + return __va(memblock_alloc(size, align)); } #ifdef CONFIG_BLK_DEV_INITRD diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index ef138731c0ea..1c28816152fa 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -200,11 +200,16 @@ config PL330_DMA platform_data for a dma-pl330 device. config PCH_DMA - tristate "Topcliff (Intel EG20T) PCH DMA support" + tristate "Intel EG20T PCH / OKI SEMICONDUCTOR ML7213 IOH DMA support" depends on PCI && X86 select DMA_ENGINE help - Enable support for the Topcliff (Intel EG20T) PCH DMA engine. + Enable support for Intel EG20T PCH DMA engine. + + This driver also can be used for OKI SEMICONDUCTOR ML7213 IOH(Input/ + Output Hub) which is for IVI(In-Vehicle Infotainment) use. + ML7213 is companion chip for Intel Atom E6xx series. + ML7213 is completely compatible for Intel EG20T PCH. config IMX_SDMA tristate "i.MX SDMA support" diff --git a/drivers/dma/amba-pl08x.c b/drivers/dma/amba-pl08x.c index b605cc9ac3a2..297f48b0cba9 100644 --- a/drivers/dma/amba-pl08x.c +++ b/drivers/dma/amba-pl08x.c @@ -19,14 +19,14 @@ * this program; if not, write to the Free Software Foundation, Inc., 59 * Temple Place - Suite 330, Boston, MA 02111-1307, USA. * - * The full GNU General Public License is iin this distribution in the - * file called COPYING. + * The full GNU General Public License is in this distribution in the file + * called COPYING. * * Documentation: ARM DDI 0196G == PL080 - * Documentation: ARM DDI 0218E == PL081 + * Documentation: ARM DDI 0218E == PL081 * - * PL080 & PL081 both have 16 sets of DMA signals that can be routed to - * any channel. + * PL080 & PL081 both have 16 sets of DMA signals that can be routed to any + * channel. * * The PL080 has 8 channels available for simultaneous use, and the PL081 * has only two channels. So on these DMA controllers the number of channels @@ -53,7 +53,23 @@ * * ASSUMES default (little) endianness for DMA transfers * - * Only DMAC flow control is implemented + * The PL08x has two flow control settings: + * - DMAC flow control: the transfer size defines the number of transfers + * which occur for the current LLI entry, and the DMAC raises TC at the + * end of every LLI entry. Observed behaviour shows the DMAC listening + * to both the BREQ and SREQ signals (contrary to documented), + * transferring data if either is active. The LBREQ and LSREQ signals + * are ignored. + * + * - Peripheral flow control: the transfer size is ignored (and should be + * zero). The data is transferred from the current LLI entry, until + * after the final transfer signalled by LBREQ or LSREQ. The DMAC + * will then move to the next LLI entry. + * + * Only the former works sanely with scatter lists, so we only implement + * the DMAC flow control method. However, peripherals which use the LBREQ + * and LSREQ signals (eg, MMCI) are unable to use this mode, which through + * these hardware restrictions prevents them from using scatter DMA. * * Global TODO: * - Break out common code from arch/arm/mach-s3c64xx and share @@ -61,50 +77,39 @@ #include <linux/device.h> #include <linux/init.h> #include <linux/module.h> -#include <linux/pci.h> #include <linux/interrupt.h> #include <linux/slab.h> #include <linux/dmapool.h> -#include <linux/amba/bus.h> #include <linux/dmaengine.h> +#include <linux/amba/bus.h> #include <linux/amba/pl08x.h> #include <linux/debugfs.h> #include <linux/seq_file.h> #include <asm/hardware/pl080.h> -#include <asm/dma.h> -#include <asm/mach/dma.h> -#include <asm/atomic.h> -#include <asm/processor.h> -#include <asm/cacheflush.h> #define DRIVER_NAME "pl08xdmac" /** - * struct vendor_data - vendor-specific config parameters - * for PL08x derivates - * @name: the name of this specific variant + * struct vendor_data - vendor-specific config parameters for PL08x derivatives * @channels: the number of channels available in this variant - * @dualmaster: whether this version supports dual AHB masters - * or not. + * @dualmaster: whether this version supports dual AHB masters or not. */ struct vendor_data { - char *name; u8 channels; bool dualmaster; }; /* * PL08X private data structures - * An LLI struct - see pl08x TRM - * Note that next uses bit[0] as a bus bit, - * start & end do not - their bus bit info - * is in cctl + * An LLI struct - see PL08x TRM. Note that next uses bit[0] as a bus bit, + * start & end do not - their bus bit info is in cctl. Also note that these + * are fixed 32-bit quantities. */ -struct lli { - dma_addr_t src; - dma_addr_t dst; - dma_addr_t next; +struct pl08x_lli { + u32 src; + u32 dst; + u32 lli; u32 cctl; }; @@ -119,6 +124,8 @@ struct lli { * @phy_chans: array of data for the physical channels * @pool: a pool for the LLI descriptors * @pool_ctr: counter of LLIs in the pool + * @lli_buses: bitmask to or in to LLI pointer selecting AHB port for LLI fetches + * @mem_buses: set to indicate memory transfers on AHB2. * @lock: a spinlock for this struct */ struct pl08x_driver_data { @@ -126,11 +133,13 @@ struct pl08x_driver_data { struct dma_device memcpy; void __iomem *base; struct amba_device *adev; - struct vendor_data *vd; + const struct vendor_data *vd; struct pl08x_platform_data *pd; struct pl08x_phy_chan *phy_chans; struct dma_pool *pool; int pool_ctr; + u8 lli_buses; + u8 mem_buses; spinlock_t lock; }; @@ -152,9 +161,9 @@ struct pl08x_driver_data { /* Size (bytes) of each LLI buffer allocated for one transfer */ # define PL08X_LLI_TSFR_SIZE 0x2000 -/* Maximimum times we call dma_pool_alloc on this pool without freeing */ +/* Maximum times we call dma_pool_alloc on this pool without freeing */ #define PL08X_MAX_ALLOCS 0x40 -#define MAX_NUM_TSFR_LLIS (PL08X_LLI_TSFR_SIZE/sizeof(struct lli)) +#define MAX_NUM_TSFR_LLIS (PL08X_LLI_TSFR_SIZE/sizeof(struct pl08x_lli)) #define PL08X_ALIGN 8 static inline struct pl08x_dma_chan *to_pl08x_chan(struct dma_chan *chan) @@ -162,6 +171,11 @@ static inline struct pl08x_dma_chan *to_pl08x_chan(struct dma_chan *chan) return container_of(chan, struct pl08x_dma_chan, chan); } +static inline struct pl08x_txd *to_pl08x_txd(struct dma_async_tx_descriptor *tx) +{ + return container_of(tx, struct pl08x_txd, tx); +} + /* * Physical channel handling */ @@ -177,88 +191,47 @@ static int pl08x_phy_channel_busy(struct pl08x_phy_chan *ch) /* * Set the initial DMA register values i.e. those for the first LLI - * The next lli pointer and the configuration interrupt bit have - * been set when the LLIs were constructed + * The next LLI pointer and the configuration interrupt bit have + * been set when the LLIs were constructed. Poke them into the hardware + * and start the transfer. */ -static void pl08x_set_cregs(struct pl08x_driver_data *pl08x, - struct pl08x_phy_chan *ch) -{ - /* Wait for channel inactive */ - while (pl08x_phy_channel_busy(ch)) - ; - - dev_vdbg(&pl08x->adev->dev, - "WRITE channel %d: csrc=%08x, cdst=%08x, " - "cctl=%08x, clli=%08x, ccfg=%08x\n", - ch->id, - ch->csrc, - ch->cdst, - ch->cctl, - ch->clli, - ch->ccfg); - - writel(ch->csrc, ch->base + PL080_CH_SRC_ADDR); - writel(ch->cdst, ch->base + PL080_CH_DST_ADDR); - writel(ch->clli, ch->base + PL080_CH_LLI); - writel(ch->cctl, ch->base + PL080_CH_CONTROL); - writel(ch->ccfg, ch->base + PL080_CH_CONFIG); -} - -static inline void pl08x_config_phychan_for_txd(struct pl08x_dma_chan *plchan) +static void pl08x_start_txd(struct pl08x_dma_chan *plchan, + struct pl08x_txd *txd) { - struct pl08x_channel_data *cd = plchan->cd; + struct pl08x_driver_data *pl08x = plchan->host; struct pl08x_phy_chan *phychan = plchan->phychan; - struct pl08x_txd *txd = plchan->at; - - /* Copy the basic control register calculated at transfer config */ - phychan->csrc = txd->csrc; - phychan->cdst = txd->cdst; - phychan->clli = txd->clli; - phychan->cctl = txd->cctl; - - /* Assign the signal to the proper control registers */ - phychan->ccfg = cd->ccfg; - phychan->ccfg &= ~PL080_CONFIG_SRC_SEL_MASK; - phychan->ccfg &= ~PL080_CONFIG_DST_SEL_MASK; - /* If it wasn't set from AMBA, ignore it */ - if (txd->direction == DMA_TO_DEVICE) - /* Select signal as destination */ - phychan->ccfg |= - (phychan->signal << PL080_CONFIG_DST_SEL_SHIFT); - else if (txd->direction == DMA_FROM_DEVICE) - /* Select signal as source */ - phychan->ccfg |= - (phychan->signal << PL080_CONFIG_SRC_SEL_SHIFT); - /* Always enable error interrupts */ - phychan->ccfg |= PL080_CONFIG_ERR_IRQ_MASK; - /* Always enable terminal interrupts */ - phychan->ccfg |= PL080_CONFIG_TC_IRQ_MASK; -} - -/* - * Enable the DMA channel - * Assumes all other configuration bits have been set - * as desired before this code is called - */ -static void pl08x_enable_phy_chan(struct pl08x_driver_data *pl08x, - struct pl08x_phy_chan *ch) -{ + struct pl08x_lli *lli = &txd->llis_va[0]; u32 val; - /* - * Do not access config register until channel shows as disabled - */ - while (readl(pl08x->base + PL080_EN_CHAN) & (1 << ch->id)) - ; + plchan->at = txd; - /* - * Do not access config register until channel shows as inactive - */ - val = readl(ch->base + PL080_CH_CONFIG); + /* Wait for channel inactive */ + while (pl08x_phy_channel_busy(phychan)) + cpu_relax(); + + dev_vdbg(&pl08x->adev->dev, + "WRITE channel %d: csrc=0x%08x, cdst=0x%08x, " + "clli=0x%08x, cctl=0x%08x, ccfg=0x%08x\n", + phychan->id, lli->src, lli->dst, lli->lli, lli->cctl, + txd->ccfg); + + writel(lli->src, phychan->base + PL080_CH_SRC_ADDR); + writel(lli->dst, phychan->base + PL080_CH_DST_ADDR); + writel(lli->lli, phychan->base + PL080_CH_LLI); + writel(lli->cctl, phychan->base + PL080_CH_CONTROL); + writel(txd->ccfg, phychan->base + PL080_CH_CONFIG); + + /* Enable the DMA channel */ + /* Do not access config register until channel shows as disabled */ + while (readl(pl08x->base + PL080_EN_CHAN) & (1 << phychan->id)) + cpu_relax(); + + /* Do not access config register until channel shows as inactive */ + val = readl(phychan->base + PL080_CH_CONFIG); while ((val & PL080_CONFIG_ACTIVE) || (val & PL080_CONFIG_ENABLE)) - val = readl(ch->base + PL080_CH_CONFIG); + val = readl(phychan->base + PL080_CH_CONFIG); - writel(val | PL080_CONFIG_ENABLE, ch->base + PL080_CH_CONFIG); + writel(val | PL080_CONFIG_ENABLE, phychan->base + PL080_CH_CONFIG); } /* @@ -266,10 +239,8 @@ static void pl08x_enable_phy_chan(struct pl08x_driver_data *pl08x, * * Disabling individual channels could lose data. * - * Disable the peripheral DMA after disabling the DMAC - * in order to allow the DMAC FIFO to drain, and - * hence allow the channel to show inactive - * + * Disable the peripheral DMA after disabling the DMAC in order to allow + * the DMAC FIFO to drain, and hence allow the channel to show inactive */ static void pl08x_pause_phy_chan(struct pl08x_phy_chan *ch) { @@ -282,7 +253,7 @@ static void pl08x_pause_phy_chan(struct pl08x_phy_chan *ch) /* Wait for channel inactive */ while (pl08x_phy_channel_busy(ch)) - ; + cpu_relax(); } static void pl08x_resume_phy_chan(struct pl08x_phy_chan *ch) @@ -333,54 +304,56 @@ static inline u32 get_bytes_in_cctl(u32 cctl) static u32 pl08x_getbytes_chan(struct pl08x_dma_chan *plchan) { struct pl08x_phy_chan *ch; - struct pl08x_txd *txdi = NULL; struct pl08x_txd *txd; unsigned long flags; - u32 bytes = 0; + size_t bytes = 0; spin_lock_irqsave(&plchan->lock, flags); - ch = plchan->phychan; txd = plchan->at; /* - * Next follow the LLIs to get the number of pending bytes in the - * currently active transaction. + * Follow the LLIs to get the number of remaining + * bytes in the currently active transaction. */ if (ch && txd) { - struct lli *llis_va = txd->llis_va; - struct lli *llis_bus = (struct lli *) txd->llis_bus; - u32 clli = readl(ch->base + PL080_CH_LLI); + u32 clli = readl(ch->base + PL080_CH_LLI) & ~PL080_LLI_LM_AHB2; - /* First get the bytes in the current active LLI */ + /* First get the remaining bytes in the active transfer */ bytes = get_bytes_in_cctl(readl(ch->base + PL080_CH_CONTROL)); if (clli) { - int i = 0; + struct pl08x_lli *llis_va = txd->llis_va; + dma_addr_t llis_bus = txd->llis_bus; + int index; + + BUG_ON(clli < llis_bus || clli >= llis_bus + + sizeof(struct pl08x_lli) * MAX_NUM_TSFR_LLIS); + + /* + * Locate the next LLI - as this is an array, + * it's simple maths to find. + */ + index = (clli - llis_bus) / sizeof(struct pl08x_lli); - /* Forward to the LLI pointed to by clli */ - while ((clli != (u32) &(llis_bus[i])) && - (i < MAX_NUM_TSFR_LLIS)) - i++; + for (; index < MAX_NUM_TSFR_LLIS; index++) { + bytes += get_bytes_in_cctl(llis_va[index].cctl); - while (clli) { - bytes += get_bytes_in_cctl(llis_va[i].cctl); /* - * A clli of 0x00000000 will terminate the - * LLI list + * A LLI pointer of 0 terminates the LLI list */ - clli = llis_va[i].next; - i++; + if (!llis_va[index].lli) + break; } } } /* Sum up all queued transactions */ - if (!list_empty(&plchan->desc_list)) { - list_for_each_entry(txdi, &plchan->desc_list, node) { + if (!list_empty(&plchan->pend_list)) { + struct pl08x_txd *txdi; + list_for_each_entry(txdi, &plchan->pend_list, node) { bytes += txdi->len; } - } spin_unlock_irqrestore(&plchan->lock, flags); @@ -390,6 +363,10 @@ static u32 pl08x_getbytes_chan(struct pl08x_dma_chan *plchan) /* * Allocate a physical channel for a virtual channel + * + * Try to locate a physical channel to be used for this transfer. If all + * are taken return NULL and the requester will have to cope by using + * some fallback PIO mode or retrying later. */ static struct pl08x_phy_chan * pl08x_get_phy_channel(struct pl08x_driver_data *pl08x, @@ -399,12 +376,6 @@ pl08x_get_phy_channel(struct pl08x_driver_data *pl08x, unsigned long flags; int i; - /* - * Try to locate a physical channel to be used for - * this transfer. If all are taken return NULL and - * the requester will have to cope by using some fallback - * PIO mode or retrying later. - */ for (i = 0; i < pl08x->vd->channels; i++) { ch = &pl08x->phy_chans[i]; @@ -465,11 +436,11 @@ static inline unsigned int pl08x_get_bytes_for_cctl(unsigned int coded) } static inline u32 pl08x_cctl_bits(u32 cctl, u8 srcwidth, u8 dstwidth, - u32 tsize) + size_t tsize) { u32 retbits = cctl; - /* Remove all src, dst and transfersize bits */ + /* Remove all src, dst and transfer size bits */ retbits &= ~PL080_CONTROL_DWIDTH_MASK; retbits &= ~PL080_CONTROL_SWIDTH_MASK; retbits &= ~PL080_CONTROL_TRANSFER_SIZE_MASK; @@ -509,95 +480,87 @@ static inline u32 pl08x_cctl_bits(u32 cctl, u8 srcwidth, u8 dstwidth, return retbits; } +struct pl08x_lli_build_data { + struct pl08x_txd *txd; + struct pl08x_driver_data *pl08x; + struct pl08x_bus_data srcbus; + struct pl08x_bus_data dstbus; + size_t remainder; +}; + /* - * Autoselect a master bus to use for the transfer - * this prefers the destination bus if both available - * if fixed address on one bus the other will be chosen + * Autoselect a master bus to use for the transfer this prefers the + * destination bus if both available if fixed address on one bus the + * other will be chosen */ -void pl08x_choose_master_bus(struct pl08x_bus_data *src_bus, - struct pl08x_bus_data *dst_bus, struct pl08x_bus_data **mbus, - struct pl08x_bus_data **sbus, u32 cctl) +static void pl08x_choose_master_bus(struct pl08x_lli_build_data *bd, + struct pl08x_bus_data **mbus, struct pl08x_bus_data **sbus, u32 cctl) { if (!(cctl & PL080_CONTROL_DST_INCR)) { - *mbus = src_bus; - *sbus = dst_bus; + *mbus = &bd->srcbus; + *sbus = &bd->dstbus; } else if (!(cctl & PL080_CONTROL_SRC_INCR)) { - *mbus = dst_bus; - *sbus = src_bus; + *mbus = &bd->dstbus; + *sbus = &bd->srcbus; } else { - if (dst_bus->buswidth == 4) { - *mbus = dst_bus; - *sbus = src_bus; - } else if (src_bus->buswidth == 4) { - *mbus = src_bus; - *sbus = dst_bus; - } else if (dst_bus->buswidth == 2) { - *mbus = dst_bus; - *sbus = src_bus; - } else if (src_bus->buswidth == 2) { - *mbus = src_bus; - *sbus = dst_bus; + if (bd->dstbus.buswidth == 4) { + *mbus = &bd->dstbus; + *sbus = &bd->srcbus; + } else if (bd->srcbus.buswidth == 4) { + *mbus = &bd->srcbus; + *sbus = &bd->dstbus; + } else if (bd->dstbus.buswidth == 2) { + *mbus = &bd->dstbus; + *sbus = &bd->srcbus; + } else if (bd->srcbus.buswidth == 2) { + *mbus = &bd->srcbus; + *sbus = &bd->dstbus; } else { - /* src_bus->buswidth == 1 */ - *mbus = dst_bus; - *sbus = src_bus; + /* bd->srcbus.buswidth == 1 */ + *mbus = &bd->dstbus; + *sbus = &bd->srcbus; } } } /* - * Fills in one LLI for a certain transfer descriptor - * and advance the counter + * Fills in one LLI for a certain transfer descriptor and advance the counter */ -int pl08x_fill_lli_for_desc(struct pl08x_driver_data *pl08x, - struct pl08x_txd *txd, int num_llis, int len, - u32 cctl, u32 *remainder) +static void pl08x_fill_lli_for_desc(struct pl08x_lli_build_data *bd, + int num_llis, int len, u32 cctl) { - struct lli *llis_va = txd->llis_va; - struct lli *llis_bus = (struct lli *) txd->llis_bus; + struct pl08x_lli *llis_va = bd->txd->llis_va; + dma_addr_t llis_bus = bd->txd->llis_bus; BUG_ON(num_llis >= MAX_NUM_TSFR_LLIS); - llis_va[num_llis].cctl = cctl; - llis_va[num_llis].src = txd->srcbus.addr; - llis_va[num_llis].dst = txd->dstbus.addr; - - /* - * On versions with dual masters, you can optionally AND on - * PL080_LLI_LM_AHB2 to the LLI to tell the hardware to read - * in new LLIs with that controller, but we always try to - * choose AHB1 to point into memory. The idea is to have AHB2 - * fixed on the peripheral and AHB1 messing around in the - * memory. So we don't manipulate this bit currently. - */ - - llis_va[num_llis].next = - (dma_addr_t)((u32) &(llis_bus[num_llis + 1])); + llis_va[num_llis].cctl = cctl; + llis_va[num_llis].src = bd->srcbus.addr; + llis_va[num_llis].dst = bd->dstbus.addr; + llis_va[num_llis].lli = llis_bus + (num_llis + 1) * sizeof(struct pl08x_lli); + if (bd->pl08x->lli_buses & PL08X_AHB2) + llis_va[num_llis].lli |= PL080_LLI_LM_AHB2; if (cctl & PL080_CONTROL_SRC_INCR) - txd->srcbus.addr += len; + bd->srcbus.addr += len; if (cctl & PL080_CONTROL_DST_INCR) - txd->dstbus.addr += len; + bd->dstbus.addr += len; - *remainder -= len; + BUG_ON(bd->remainder < len); - return num_llis + 1; + bd->remainder -= len; } /* - * Return number of bytes to fill to boundary, or len + * Return number of bytes to fill to boundary, or len. + * This calculation works for any value of addr. */ -static inline u32 pl08x_pre_boundary(u32 addr, u32 len) +static inline size_t pl08x_pre_boundary(u32 addr, size_t len) { - u32 boundary; - - boundary = ((addr >> PL08X_BOUNDARY_SHIFT) + 1) - << PL08X_BOUNDARY_SHIFT; + size_t boundary_len = PL08X_BOUNDARY_SIZE - + (addr & (PL08X_BOUNDARY_SIZE - 1)); - if (boundary < addr + len) - return boundary - addr; - else - return len; + return min(boundary_len, len); } /* @@ -608,20 +571,13 @@ static inline u32 pl08x_pre_boundary(u32 addr, u32 len) static int pl08x_fill_llis_for_desc(struct pl08x_driver_data *pl08x, struct pl08x_txd *txd) { - struct pl08x_channel_data *cd = txd->cd; struct pl08x_bus_data *mbus, *sbus; - u32 remainder; + struct pl08x_lli_build_data bd; int num_llis = 0; u32 cctl; - int max_bytes_per_lli; - int total_bytes = 0; - struct lli *llis_va; - struct lli *llis_bus; - - if (!txd) { - dev_err(&pl08x->adev->dev, "%s no descriptor\n", __func__); - return 0; - } + size_t max_bytes_per_lli; + size_t total_bytes = 0; + struct pl08x_lli *llis_va; txd->llis_va = dma_pool_alloc(pl08x->pool, GFP_NOWAIT, &txd->llis_bus); @@ -632,121 +588,79 @@ static int pl08x_fill_llis_for_desc(struct pl08x_driver_data *pl08x, pl08x->pool_ctr++; - /* - * Initialize bus values for this transfer - * from the passed optimal values - */ - if (!cd) { - dev_err(&pl08x->adev->dev, "%s no channel data\n", __func__); - return 0; - } + /* Get the default CCTL */ + cctl = txd->cctl; - /* Get the default CCTL from the platform data */ - cctl = cd->cctl; - - /* - * On the PL080 we have two bus masters and we - * should select one for source and one for - * destination. We try to use AHB2 for the - * bus which does not increment (typically the - * peripheral) else we just choose something. - */ - cctl &= ~(PL080_CONTROL_DST_AHB2 | PL080_CONTROL_SRC_AHB2); - if (pl08x->vd->dualmaster) { - if (cctl & PL080_CONTROL_SRC_INCR) - /* Source increments, use AHB2 for destination */ - cctl |= PL080_CONTROL_DST_AHB2; - else if (cctl & PL080_CONTROL_DST_INCR) - /* Destination increments, use AHB2 for source */ - cctl |= PL080_CONTROL_SRC_AHB2; - else - /* Just pick something, source AHB1 dest AHB2 */ - cctl |= PL080_CONTROL_DST_AHB2; - } + bd.txd = txd; + bd.pl08x = pl08x; + bd.srcbus.addr = txd->src_addr; + bd.dstbus.addr = txd->dst_addr; /* Find maximum width of the source bus */ - txd->srcbus.maxwidth = + bd.srcbus.maxwidth = pl08x_get_bytes_for_cctl((cctl & PL080_CONTROL_SWIDTH_MASK) >> PL080_CONTROL_SWIDTH_SHIFT); /* Find maximum width of the destination bus */ - txd->dstbus.maxwidth = + bd.dstbus.maxwidth = pl08x_get_bytes_for_cctl((cctl & PL080_CONTROL_DWIDTH_MASK) >> PL080_CONTROL_DWIDTH_SHIFT); /* Set up the bus widths to the maximum */ - txd->srcbus.buswidth = txd->srcbus.maxwidth; - txd->dstbus.buswidth = txd->dstbus.maxwidth; + bd.srcbus.buswidth = bd.srcbus.maxwidth; + bd.dstbus.buswidth = bd.dstbus.maxwidth; dev_vdbg(&pl08x->adev->dev, "%s source bus is %d bytes wide, dest bus is %d bytes wide\n", - __func__, txd->srcbus.buswidth, txd->dstbus.buswidth); + __func__, bd.srcbus.buswidth, bd.dstbus.buswidth); /* * Bytes transferred == tsize * MIN(buswidths), not max(buswidths) */ - max_bytes_per_lli = min(txd->srcbus.buswidth, txd->dstbus.buswidth) * + max_bytes_per_lli = min(bd.srcbus.buswidth, bd.dstbus.buswidth) * PL080_CONTROL_TRANSFER_SIZE_MASK; dev_vdbg(&pl08x->adev->dev, - "%s max bytes per lli = %d\n", + "%s max bytes per lli = %zu\n", __func__, max_bytes_per_lli); /* We need to count this down to zero */ - remainder = txd->len; + bd.remainder = txd->len; dev_vdbg(&pl08x->adev->dev, - "%s remainder = %d\n", - __func__, remainder); + "%s remainder = %zu\n", + __func__, bd.remainder); /* * Choose bus to align to * - prefers destination bus if both available * - if fixed address on one bus chooses other - * - modifies cctl to choose an apropriate master - */ - pl08x_choose_master_bus(&txd->srcbus, &txd->dstbus, - &mbus, &sbus, cctl); - - - /* - * The lowest bit of the LLI register - * is also used to indicate which master to - * use for reading the LLIs. */ + pl08x_choose_master_bus(&bd, &mbus, &sbus, cctl); if (txd->len < mbus->buswidth) { - /* - * Less than a bus width available - * - send as single bytes - */ - while (remainder) { + /* Less than a bus width available - send as single bytes */ + while (bd.remainder) { dev_vdbg(&pl08x->adev->dev, "%s single byte LLIs for a transfer of " - "less than a bus width (remain %08x)\n", - __func__, remainder); + "less than a bus width (remain 0x%08x)\n", + __func__, bd.remainder); cctl = pl08x_cctl_bits(cctl, 1, 1, 1); - num_llis = - pl08x_fill_lli_for_desc(pl08x, txd, num_llis, 1, - cctl, &remainder); + pl08x_fill_lli_for_desc(&bd, num_llis++, 1, cctl); total_bytes++; } } else { - /* - * Make one byte LLIs until master bus is aligned - * - slave will then be aligned also - */ + /* Make one byte LLIs until master bus is aligned */ while ((mbus->addr) % (mbus->buswidth)) { dev_vdbg(&pl08x->adev->dev, "%s adjustment lli for less than bus width " - "(remain %08x)\n", - __func__, remainder); + "(remain 0x%08x)\n", + __func__, bd.remainder); cctl = pl08x_cctl_bits(cctl, 1, 1, 1); - num_llis = pl08x_fill_lli_for_desc - (pl08x, txd, num_llis, 1, cctl, &remainder); + pl08x_fill_lli_for_desc(&bd, num_llis++, 1, cctl); total_bytes++; } /* - * Master now aligned + * Master now aligned * - if slave is not then we must set its width down */ if (sbus->addr % sbus->buswidth) { @@ -761,63 +675,51 @@ static int pl08x_fill_llis_for_desc(struct pl08x_driver_data *pl08x, * Make largest possible LLIs until less than one bus * width left */ - while (remainder > (mbus->buswidth - 1)) { - int lli_len, target_len; - int tsize; - int odd_bytes; + while (bd.remainder > (mbus->buswidth - 1)) { + size_t lli_len, target_len, tsize, odd_bytes; /* * If enough left try to send max possible, * otherwise try to send the remainder */ - target_len = remainder; - if (remainder > max_bytes_per_lli) - target_len = max_bytes_per_lli; + target_len = min(bd.remainder, max_bytes_per_lli); /* - * Set bus lengths for incrementing busses - * to number of bytes which fill to next memory - * boundary + * Set bus lengths for incrementing buses to the + * number of bytes which fill to next memory boundary, + * limiting on the target length calculated above. */ if (cctl & PL080_CONTROL_SRC_INCR) - txd->srcbus.fill_bytes = - pl08x_pre_boundary( - txd->srcbus.addr, - remainder); + bd.srcbus.fill_bytes = + pl08x_pre_boundary(bd.srcbus.addr, + target_len); else - txd->srcbus.fill_bytes = - max_bytes_per_lli; + bd.srcbus.fill_bytes = target_len; if (cctl & PL080_CONTROL_DST_INCR) - txd->dstbus.fill_bytes = - pl08x_pre_boundary( - txd->dstbus.addr, - remainder); + bd.dstbus.fill_bytes = + pl08x_pre_boundary(bd.dstbus.addr, + target_len); else - txd->dstbus.fill_bytes = - max_bytes_per_lli; + bd.dstbus.fill_bytes = target_len; - /* - * Find the nearest - */ - lli_len = min(txd->srcbus.fill_bytes, - txd->dstbus.fill_bytes); + /* Find the nearest */ + lli_len = min(bd.srcbus.fill_bytes, + bd.dstbus.fill_bytes); - BUG_ON(lli_len > remainder); + BUG_ON(lli_len > bd.remainder); if (lli_len <= 0) { dev_err(&pl08x->adev->dev, - "%s lli_len is %d, <= 0\n", + "%s lli_len is %zu, <= 0\n", __func__, lli_len); return 0; } if (lli_len == target_len) { /* - * Can send what we wanted - */ - /* - * Maintain alignment + * Can send what we wanted. + * Maintain alignment */ lli_len = (lli_len/mbus->buswidth) * mbus->buswidth; @@ -825,17 +727,14 @@ static int pl08x_fill_llis_for_desc(struct pl08x_driver_data *pl08x, } else { /* * So now we know how many bytes to transfer - * to get to the nearest boundary - * The next lli will past the boundary - * - however we may be working to a boundary - * on the slave bus - * We need to ensure the master stays aligned + * to get to the nearest boundary. The next + * LLI will past the boundary. However, we + * may be working to a boundary on the slave + * bus. We need to ensure the master stays + * aligned, and that we are working in + * multiples of the bus widths. */ odd_bytes = lli_len % mbus->buswidth; - /* - * - and that we are working in multiples - * of the bus widths - */ lli_len -= odd_bytes; } @@ -855,41 +754,38 @@ static int pl08x_fill_llis_for_desc(struct pl08x_driver_data *pl08x, if (target_len != lli_len) { dev_vdbg(&pl08x->adev->dev, - "%s can't send what we want. Desired %08x, lli of %08x bytes in txd of %08x\n", + "%s can't send what we want. Desired 0x%08zx, lli of 0x%08zx bytes in txd of 0x%08zx\n", __func__, target_len, lli_len, txd->len); } cctl = pl08x_cctl_bits(cctl, - txd->srcbus.buswidth, - txd->dstbus.buswidth, + bd.srcbus.buswidth, + bd.dstbus.buswidth, tsize); dev_vdbg(&pl08x->adev->dev, - "%s fill lli with single lli chunk of size %08x (remainder %08x)\n", - __func__, lli_len, remainder); - num_llis = pl08x_fill_lli_for_desc(pl08x, txd, - num_llis, lli_len, cctl, - &remainder); + "%s fill lli with single lli chunk of size 0x%08zx (remainder 0x%08zx)\n", + __func__, lli_len, bd.remainder); + pl08x_fill_lli_for_desc(&bd, num_llis++, + lli_len, cctl); total_bytes += lli_len; } if (odd_bytes) { /* - * Creep past the boundary, - * maintaining master alignment + * Creep past the boundary, maintaining + * master alignment */ int j; for (j = 0; (j < mbus->buswidth) - && (remainder); j++) { + && (bd.remainder); j++) { cctl = pl08x_cctl_bits(cctl, 1, 1, 1); dev_vdbg(&pl08x->adev->dev, - "%s align with boundardy, single byte (remain %08x)\n", - __func__, remainder); - num_llis = - pl08x_fill_lli_for_desc(pl08x, - txd, num_llis, 1, - cctl, &remainder); + "%s align with boundary, single byte (remain 0x%08zx)\n", + __func__, bd.remainder); + pl08x_fill_lli_for_desc(&bd, + num_llis++, 1, cctl); total_bytes++; } } @@ -898,25 +794,18 @@ static int pl08x_fill_llis_for_desc(struct pl08x_driver_data *pl08x, /* * Send any odd bytes */ - if (remainder < 0) { - dev_err(&pl08x->adev->dev, "%s remainder not fitted 0x%08x bytes\n", - __func__, remainder); - return 0; - } - - while (remainder) { + while (bd.remainder) { cctl = pl08x_cctl_bits(cctl, 1, 1, 1); dev_vdbg(&pl08x->adev->dev, - "%s align with boundardy, single odd byte (remain %d)\n", - __func__, remainder); - num_llis = pl08x_fill_lli_for_desc(pl08x, txd, num_llis, - 1, cctl, &remainder); + "%s align with boundary, single odd byte (remain %zu)\n", + __func__, bd.remainder); + pl08x_fill_lli_for_desc(&bd, num_llis++, 1, cctl); total_bytes++; } } if (total_bytes != txd->len) { dev_err(&pl08x->adev->dev, - "%s size of encoded lli:s don't match total txd, transferred 0x%08x from size 0x%08x\n", + "%s size of encoded lli:s don't match total txd, transferred 0x%08zx from size 0x%08zx\n", __func__, total_bytes, txd->len); return 0; } @@ -927,41 +816,12 @@ static int pl08x_fill_llis_for_desc(struct pl08x_driver_data *pl08x, __func__, (u32) MAX_NUM_TSFR_LLIS); return 0; } - /* - * Decide whether this is a loop or a terminated transfer - */ - llis_va = txd->llis_va; - llis_bus = (struct lli *) txd->llis_bus; - if (cd->circular_buffer) { - /* - * Loop the circular buffer so that the next element - * points back to the beginning of the LLI. - */ - llis_va[num_llis - 1].next = - (dma_addr_t)((unsigned int)&(llis_bus[0])); - } else { - /* - * On non-circular buffers, the final LLI terminates - * the LLI. - */ - llis_va[num_llis - 1].next = 0; - /* - * The final LLI element shall also fire an interrupt - */ - llis_va[num_llis - 1].cctl |= PL080_CONTROL_TC_IRQ_EN; - } - - /* Now store the channel register values */ - txd->csrc = llis_va[0].src; - txd->cdst = llis_va[0].dst; - if (num_llis > 1) - txd->clli = llis_va[0].next; - else - txd->clli = 0; - - txd->cctl = llis_va[0].cctl; - /* ccfg will be set at physical channel allocation time */ + llis_va = txd->llis_va; + /* The final LLI terminates the LLI. */ + llis_va[num_llis - 1].lli = 0; + /* The final LLI element shall also fire an interrupt. */ + llis_va[num_llis - 1].cctl |= PL080_CONTROL_TC_IRQ_EN; #ifdef VERBOSE_DEBUG { @@ -969,13 +829,13 @@ static int pl08x_fill_llis_for_desc(struct pl08x_driver_data *pl08x, for (i = 0; i < num_llis; i++) { dev_vdbg(&pl08x->adev->dev, - "lli %d @%p: csrc=%08x, cdst=%08x, cctl=%08x, clli=%08x\n", + "lli %d @%p: csrc=0x%08x, cdst=0x%08x, cctl=0x%08x, clli=0x%08x\n", i, &llis_va[i], llis_va[i].src, llis_va[i].dst, llis_va[i].cctl, - llis_va[i].next + llis_va[i].lli ); } } @@ -988,14 +848,8 @@ static int pl08x_fill_llis_for_desc(struct pl08x_driver_data *pl08x, static void pl08x_free_txd(struct pl08x_driver_data *pl08x, struct pl08x_txd *txd) { - if (!txd) - dev_err(&pl08x->adev->dev, - "%s no descriptor to free\n", - __func__); - /* Free the LLI */ - dma_pool_free(pl08x->pool, txd->llis_va, - txd->llis_bus); + dma_pool_free(pl08x->pool, txd->llis_va, txd->llis_bus); pl08x->pool_ctr--; @@ -1008,13 +862,12 @@ static void pl08x_free_txd_list(struct pl08x_driver_data *pl08x, struct pl08x_txd *txdi = NULL; struct pl08x_txd *next; - if (!list_empty(&plchan->desc_list)) { + if (!list_empty(&plchan->pend_list)) { list_for_each_entry_safe(txdi, - next, &plchan->desc_list, node) { + next, &plchan->pend_list, node) { list_del(&txdi->node); pl08x_free_txd(pl08x, txdi); } - } } @@ -1069,6 +922,12 @@ static int prep_phy_channel(struct pl08x_dma_chan *plchan, return -EBUSY; } ch->signal = ret; + + /* Assign the flow control signal to this channel */ + if (txd->direction == DMA_TO_DEVICE) + txd->ccfg |= ch->signal << PL080_CONFIG_DST_SEL_SHIFT; + else if (txd->direction == DMA_FROM_DEVICE) + txd->ccfg |= ch->signal << PL080_CONFIG_SRC_SEL_SHIFT; } dev_dbg(&pl08x->adev->dev, "allocated physical channel %d and signal %d for xfer on %s\n", @@ -1076,19 +935,54 @@ static int prep_phy_channel(struct pl08x_dma_chan *plchan, ch->signal, plchan->name); + plchan->phychan_hold++; plchan->phychan = ch; return 0; } +static void release_phy_channel(struct pl08x_dma_chan *plchan) +{ + struct pl08x_driver_data *pl08x = plchan->host; + + if ((plchan->phychan->signal >= 0) && pl08x->pd->put_signal) { + pl08x->pd->put_signal(plchan); + plchan->phychan->signal = -1; + } + pl08x_put_phy_channel(pl08x, plchan->phychan); + plchan->phychan = NULL; +} + static dma_cookie_t pl08x_tx_submit(struct dma_async_tx_descriptor *tx) { struct pl08x_dma_chan *plchan = to_pl08x_chan(tx->chan); + struct pl08x_txd *txd = to_pl08x_txd(tx); + unsigned long flags; - atomic_inc(&plchan->last_issued); - tx->cookie = atomic_read(&plchan->last_issued); - /* This unlock follows the lock in the prep() function */ - spin_unlock_irqrestore(&plchan->lock, plchan->lockflags); + spin_lock_irqsave(&plchan->lock, flags); + + plchan->chan.cookie += 1; + if (plchan->chan.cookie < 0) + plchan->chan.cookie = 1; + tx->cookie = plchan->chan.cookie; + + /* Put this onto the pending list */ + list_add_tail(&txd->node, &plchan->pend_list); + + /* + * If there was no physical channel available for this memcpy, + * stack the request up and indicate that the channel is waiting + * for a free physical channel. + */ + if (!plchan->slave && !plchan->phychan) { + /* Do this memcpy whenever there is a channel ready */ + plchan->state = PL08X_CHAN_WAITING; + plchan->waiting = txd; + } else { + plchan->phychan_hold--; + } + + spin_unlock_irqrestore(&plchan->lock, flags); return tx->cookie; } @@ -1102,10 +996,9 @@ static struct dma_async_tx_descriptor *pl08x_prep_dma_interrupt( } /* - * Code accessing dma_async_is_complete() in a tight loop - * may give problems - could schedule where indicated. - * If slaves are relying on interrupts to signal completion this - * function must not be called with interrupts disabled + * Code accessing dma_async_is_complete() in a tight loop may give problems. + * If slaves are relying on interrupts to signal completion this function + * must not be called with interrupts disabled. */ static enum dma_status pl08x_dma_tx_status(struct dma_chan *chan, @@ -1118,7 +1011,7 @@ pl08x_dma_tx_status(struct dma_chan *chan, enum dma_status ret; u32 bytesleft = 0; - last_used = atomic_read(&plchan->last_issued); + last_used = plchan->chan.cookie; last_complete = plchan->lc; ret = dma_async_is_complete(cookie, last_complete, last_used); @@ -1128,13 +1021,9 @@ pl08x_dma_tx_status(struct dma_chan *chan, } /* - * schedule(); could be inserted here - */ - - /* * This cookie not complete yet */ - last_used = atomic_read(&plchan->last_issued); + last_used = plchan->chan.cookie; last_complete = plchan->lc; /* Get number of bytes left in the active transactions and queue */ @@ -1199,37 +1088,35 @@ static const struct burst_table burst_sizes[] = { }, }; -static void dma_set_runtime_config(struct dma_chan *chan, - struct dma_slave_config *config) +static int dma_set_runtime_config(struct dma_chan *chan, + struct dma_slave_config *config) { struct pl08x_dma_chan *plchan = to_pl08x_chan(chan); struct pl08x_driver_data *pl08x = plchan->host; struct pl08x_channel_data *cd = plchan->cd; enum dma_slave_buswidth addr_width; + dma_addr_t addr; u32 maxburst; u32 cctl = 0; - /* Mask out all except src and dst channel */ - u32 ccfg = cd->ccfg & 0x000003DEU; - int i = 0; + int i; + + if (!plchan->slave) + return -EINVAL; /* Transfer direction */ plchan->runtime_direction = config->direction; if (config->direction == DMA_TO_DEVICE) { - plchan->runtime_addr = config->dst_addr; - cctl |= PL080_CONTROL_SRC_INCR; - ccfg |= PL080_FLOW_MEM2PER << PL080_CONFIG_FLOW_CONTROL_SHIFT; + addr = config->dst_addr; addr_width = config->dst_addr_width; maxburst = config->dst_maxburst; } else if (config->direction == DMA_FROM_DEVICE) { - plchan->runtime_addr = config->src_addr; - cctl |= PL080_CONTROL_DST_INCR; - ccfg |= PL080_FLOW_PER2MEM << PL080_CONFIG_FLOW_CONTROL_SHIFT; + addr = config->src_addr; addr_width = config->src_addr_width; maxburst = config->src_maxburst; } else { dev_err(&pl08x->adev->dev, "bad runtime_config: alien transfer direction\n"); - return; + return -EINVAL; } switch (addr_width) { @@ -1248,42 +1135,40 @@ static void dma_set_runtime_config(struct dma_chan *chan, default: dev_err(&pl08x->adev->dev, "bad runtime_config: alien address width\n"); - return; + return -EINVAL; } /* * Now decide on a maxburst: - * If this channel will only request single transfers, set - * this down to ONE element. + * If this channel will only request single transfers, set this + * down to ONE element. Also select one element if no maxburst + * is specified. */ - if (plchan->cd->single) { + if (plchan->cd->single || maxburst == 0) { cctl |= (PL080_BSIZE_1 << PL080_CONTROL_SB_SIZE_SHIFT) | (PL080_BSIZE_1 << PL080_CONTROL_DB_SIZE_SHIFT); } else { - while (i < ARRAY_SIZE(burst_sizes)) { + for (i = 0; i < ARRAY_SIZE(burst_sizes); i++) if (burst_sizes[i].burstwords <= maxburst) break; - i++; - } cctl |= burst_sizes[i].reg; } - /* Access the cell in privileged mode, non-bufferable, non-cacheable */ - cctl &= ~PL080_CONTROL_PROT_MASK; - cctl |= PL080_CONTROL_PROT_SYS; + plchan->runtime_addr = addr; /* Modify the default channel data to fit PrimeCell request */ cd->cctl = cctl; - cd->ccfg = ccfg; dev_dbg(&pl08x->adev->dev, "configured channel %s (%s) for %s, data width %d, " - "maxburst %d words, LE, CCTL=%08x, CCFG=%08x\n", + "maxburst %d words, LE, CCTL=0x%08x\n", dma_chan_name(chan), plchan->name, (config->direction == DMA_FROM_DEVICE) ? "RX" : "TX", addr_width, maxburst, - cctl, ccfg); + cctl); + + return 0; } /* @@ -1293,35 +1178,26 @@ static void dma_set_runtime_config(struct dma_chan *chan, static void pl08x_issue_pending(struct dma_chan *chan) { struct pl08x_dma_chan *plchan = to_pl08x_chan(chan); - struct pl08x_driver_data *pl08x = plchan->host; unsigned long flags; spin_lock_irqsave(&plchan->lock, flags); - /* Something is already active */ - if (plchan->at) { - spin_unlock_irqrestore(&plchan->lock, flags); - return; - } - - /* Didn't get a physical channel so waiting for it ... */ - if (plchan->state == PL08X_CHAN_WAITING) + /* Something is already active, or we're waiting for a channel... */ + if (plchan->at || plchan->state == PL08X_CHAN_WAITING) { + spin_unlock_irqrestore(&plchan->lock, flags); return; + } /* Take the first element in the queue and execute it */ - if (!list_empty(&plchan->desc_list)) { + if (!list_empty(&plchan->pend_list)) { struct pl08x_txd *next; - next = list_first_entry(&plchan->desc_list, + next = list_first_entry(&plchan->pend_list, struct pl08x_txd, node); list_del(&next->node); - plchan->at = next; plchan->state = PL08X_CHAN_RUNNING; - /* Configure the physical channel for the active txd */ - pl08x_config_phychan_for_txd(plchan); - pl08x_set_cregs(pl08x, plchan->phychan); - pl08x_enable_phy_chan(pl08x, plchan->phychan); + pl08x_start_txd(plchan, next); } spin_unlock_irqrestore(&plchan->lock, flags); @@ -1330,30 +1206,17 @@ static void pl08x_issue_pending(struct dma_chan *chan) static int pl08x_prep_channel_resources(struct pl08x_dma_chan *plchan, struct pl08x_txd *txd) { - int num_llis; struct pl08x_driver_data *pl08x = plchan->host; - int ret; + unsigned long flags; + int num_llis, ret; num_llis = pl08x_fill_llis_for_desc(pl08x, txd); - - if (!num_llis) + if (!num_llis) { + kfree(txd); return -EINVAL; + } - spin_lock_irqsave(&plchan->lock, plchan->lockflags); - - /* - * If this device is not using a circular buffer then - * queue this new descriptor for transfer. - * The descriptor for a circular buffer continues - * to be used until the channel is freed. - */ - if (txd->cd->circular_buffer) - dev_err(&pl08x->adev->dev, - "%s attempting to queue a circular buffer\n", - __func__); - else - list_add_tail(&txd->node, - &plchan->desc_list); + spin_lock_irqsave(&plchan->lock, flags); /* * See if we already have a physical channel allocated, @@ -1362,45 +1225,74 @@ static int pl08x_prep_channel_resources(struct pl08x_dma_chan *plchan, ret = prep_phy_channel(plchan, txd); if (ret) { /* - * No physical channel available, we will - * stack up the memcpy channels until there is a channel - * available to handle it whereas slave transfers may - * have been denied due to platform channel muxing restrictions - * and since there is no guarantee that this will ever be - * resolved, and since the signal must be aquired AFTER - * aquiring the physical channel, we will let them be NACK:ed - * with -EBUSY here. The drivers can alway retry the prep() - * call if they are eager on doing this using DMA. + * No physical channel was available. + * + * memcpy transfers can be sorted out at submission time. + * + * Slave transfers may have been denied due to platform + * channel muxing restrictions. Since there is no guarantee + * that this will ever be resolved, and the signal must be + * acquired AFTER acquiring the physical channel, we will let + * them be NACK:ed with -EBUSY here. The drivers can retry + * the prep() call if they are eager on doing this using DMA. */ if (plchan->slave) { pl08x_free_txd_list(pl08x, plchan); - spin_unlock_irqrestore(&plchan->lock, plchan->lockflags); + pl08x_free_txd(pl08x, txd); + spin_unlock_irqrestore(&plchan->lock, flags); return -EBUSY; } - /* Do this memcpy whenever there is a channel ready */ - plchan->state = PL08X_CHAN_WAITING; - plchan->waiting = txd; } else /* - * Else we're all set, paused and ready to roll, - * status will switch to PL08X_CHAN_RUNNING when - * we call issue_pending(). If there is something - * running on the channel already we don't change - * its state. + * Else we're all set, paused and ready to roll, status + * will switch to PL08X_CHAN_RUNNING when we call + * issue_pending(). If there is something running on the + * channel already we don't change its state. */ if (plchan->state == PL08X_CHAN_IDLE) plchan->state = PL08X_CHAN_PAUSED; - /* - * Notice that we leave plchan->lock locked on purpose: - * it will be unlocked in the subsequent tx_submit() - * call. This is a consequence of the current API. - */ + spin_unlock_irqrestore(&plchan->lock, flags); return 0; } /* + * Given the source and destination available bus masks, select which + * will be routed to each port. We try to have source and destination + * on separate ports, but always respect the allowable settings. + */ +static u32 pl08x_select_bus(struct pl08x_driver_data *pl08x, u8 src, u8 dst) +{ + u32 cctl = 0; + + if (!(dst & PL08X_AHB1) || ((dst & PL08X_AHB2) && (src & PL08X_AHB1))) + cctl |= PL080_CONTROL_DST_AHB2; + if (!(src & PL08X_AHB1) || ((src & PL08X_AHB2) && !(dst & PL08X_AHB2))) + cctl |= PL080_CONTROL_SRC_AHB2; + + return cctl; +} + +static struct pl08x_txd *pl08x_get_txd(struct pl08x_dma_chan *plchan, + unsigned long flags) +{ + struct pl08x_txd *txd = kzalloc(sizeof(struct pl08x_txd), GFP_NOWAIT); + + if (txd) { + dma_async_tx_descriptor_init(&txd->tx, &plchan->chan); + txd->tx.flags = flags; + txd->tx.tx_submit = pl08x_tx_submit; + INIT_LIST_HEAD(&txd->node); + + /* Always enable error and terminal interrupts */ + txd->ccfg = PL080_CONFIG_ERR_IRQ_MASK | + PL080_CONFIG_TC_IRQ_MASK; + } + return txd; +} + +/* * Initialize a descriptor to be used by memcpy submit */ static struct dma_async_tx_descriptor *pl08x_prep_dma_memcpy( @@ -1412,40 +1304,38 @@ static struct dma_async_tx_descriptor *pl08x_prep_dma_memcpy( struct pl08x_txd *txd; int ret; - txd = kzalloc(sizeof(struct pl08x_txd), GFP_NOWAIT); + txd = pl08x_get_txd(plchan, flags); if (!txd) { dev_err(&pl08x->adev->dev, "%s no memory for descriptor\n", __func__); return NULL; } - dma_async_tx_descriptor_init(&txd->tx, chan); txd->direction = DMA_NONE; - txd->srcbus.addr = src; - txd->dstbus.addr = dest; + txd->src_addr = src; + txd->dst_addr = dest; + txd->len = len; /* Set platform data for m2m */ - txd->cd = &pl08x->pd->memcpy_channel; + txd->ccfg |= PL080_FLOW_MEM2MEM << PL080_CONFIG_FLOW_CONTROL_SHIFT; + txd->cctl = pl08x->pd->memcpy_channel.cctl & + ~(PL080_CONTROL_DST_AHB2 | PL080_CONTROL_SRC_AHB2); + /* Both to be incremented or the code will break */ - txd->cd->cctl |= PL080_CONTROL_SRC_INCR | PL080_CONTROL_DST_INCR; - txd->tx.tx_submit = pl08x_tx_submit; - txd->tx.callback = NULL; - txd->tx.callback_param = NULL; - txd->len = len; + txd->cctl |= PL080_CONTROL_SRC_INCR | PL080_CONTROL_DST_INCR; + + if (pl08x->vd->dualmaster) + txd->cctl |= pl08x_select_bus(pl08x, + pl08x->mem_buses, pl08x->mem_buses); - INIT_LIST_HEAD(&txd->node); ret = pl08x_prep_channel_resources(plchan, txd); if (ret) return NULL; - /* - * NB: the channel lock is held at this point so tx_submit() - * must be called in direct succession. - */ return &txd->tx; } -struct dma_async_tx_descriptor *pl08x_prep_slave_sg( +static struct dma_async_tx_descriptor *pl08x_prep_slave_sg( struct dma_chan *chan, struct scatterlist *sgl, unsigned int sg_len, enum dma_data_direction direction, unsigned long flags) @@ -1453,6 +1343,7 @@ struct dma_async_tx_descriptor *pl08x_prep_slave_sg( struct pl08x_dma_chan *plchan = to_pl08x_chan(chan); struct pl08x_driver_data *pl08x = plchan->host; struct pl08x_txd *txd; + u8 src_buses, dst_buses; int ret; /* @@ -1467,14 +1358,12 @@ struct dma_async_tx_descriptor *pl08x_prep_slave_sg( dev_dbg(&pl08x->adev->dev, "%s prepare transaction of %d bytes from %s\n", __func__, sgl->length, plchan->name); - txd = kzalloc(sizeof(struct pl08x_txd), GFP_NOWAIT); + txd = pl08x_get_txd(plchan, flags); if (!txd) { dev_err(&pl08x->adev->dev, "%s no txd\n", __func__); return NULL; } - dma_async_tx_descriptor_init(&txd->tx, chan); - if (direction != plchan->runtime_direction) dev_err(&pl08x->adev->dev, "%s DMA setup does not match " "the direction configured for the PrimeCell\n", @@ -1486,37 +1375,47 @@ struct dma_async_tx_descriptor *pl08x_prep_slave_sg( * channel target address dynamically at runtime. */ txd->direction = direction; + txd->len = sgl->length; + + txd->cctl = plchan->cd->cctl & + ~(PL080_CONTROL_SRC_AHB2 | PL080_CONTROL_DST_AHB2 | + PL080_CONTROL_SRC_INCR | PL080_CONTROL_DST_INCR | + PL080_CONTROL_PROT_MASK); + + /* Access the cell in privileged mode, non-bufferable, non-cacheable */ + txd->cctl |= PL080_CONTROL_PROT_SYS; + if (direction == DMA_TO_DEVICE) { - txd->srcbus.addr = sgl->dma_address; + txd->ccfg |= PL080_FLOW_MEM2PER << PL080_CONFIG_FLOW_CONTROL_SHIFT; + txd->cctl |= PL080_CONTROL_SRC_INCR; + txd->src_addr = sgl->dma_address; if (plchan->runtime_addr) - txd->dstbus.addr = plchan->runtime_addr; + txd->dst_addr = plchan->runtime_addr; else - txd->dstbus.addr = plchan->cd->addr; + txd->dst_addr = plchan->cd->addr; + src_buses = pl08x->mem_buses; + dst_buses = plchan->cd->periph_buses; } else if (direction == DMA_FROM_DEVICE) { + txd->ccfg |= PL080_FLOW_PER2MEM << PL080_CONFIG_FLOW_CONTROL_SHIFT; + txd->cctl |= PL080_CONTROL_DST_INCR; if (plchan->runtime_addr) - txd->srcbus.addr = plchan->runtime_addr; + txd->src_addr = plchan->runtime_addr; else - txd->srcbus.addr = plchan->cd->addr; - txd->dstbus.addr = sgl->dma_address; + txd->src_addr = plchan->cd->addr; + txd->dst_addr = sgl->dma_address; + src_buses = plchan->cd->periph_buses; + dst_buses = pl08x->mem_buses; } else { dev_err(&pl08x->adev->dev, "%s direction unsupported\n", __func__); return NULL; } - txd->cd = plchan->cd; - txd->tx.tx_submit = pl08x_tx_submit; - txd->tx.callback = NULL; - txd->tx.callback_param = NULL; - txd->len = sgl->length; - INIT_LIST_HEAD(&txd->node); + + txd->cctl |= pl08x_select_bus(pl08x, src_buses, dst_buses); ret = pl08x_prep_channel_resources(plchan, txd); if (ret) return NULL; - /* - * NB: the channel lock is held at this point so tx_submit() - * must be called in direct succession. - */ return &txd->tx; } @@ -1531,10 +1430,8 @@ static int pl08x_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd, /* Controls applicable to inactive channels */ if (cmd == DMA_SLAVE_CONFIG) { - dma_set_runtime_config(chan, - (struct dma_slave_config *) - arg); - return 0; + return dma_set_runtime_config(chan, + (struct dma_slave_config *)arg); } /* @@ -1558,16 +1455,8 @@ static int pl08x_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd, * Mark physical channel as free and free any slave * signal */ - if ((plchan->phychan->signal >= 0) && - pl08x->pd->put_signal) { - pl08x->pd->put_signal(plchan); - plchan->phychan->signal = -1; - } - pl08x_put_phy_channel(pl08x, plchan->phychan); - plchan->phychan = NULL; + release_phy_channel(plchan); } - /* Stop any pending tasklet */ - tasklet_disable(&plchan->tasklet); /* Dequeue jobs and free LLIs */ if (plchan->at) { pl08x_free_txd(pl08x, plchan->at); @@ -1609,10 +1498,9 @@ bool pl08x_filter_id(struct dma_chan *chan, void *chan_id) /* * Just check that the device is there and active - * TODO: turn this bit on/off depending on the number of - * physical channels actually used, if it is zero... well - * shut it off. That will save some power. Cut the clock - * at the same time. + * TODO: turn this bit on/off depending on the number of physical channels + * actually used, if it is zero... well shut it off. That will save some + * power. Cut the clock at the same time. */ static void pl08x_ensure_on(struct pl08x_driver_data *pl08x) { @@ -1620,78 +1508,66 @@ static void pl08x_ensure_on(struct pl08x_driver_data *pl08x) val = readl(pl08x->base + PL080_CONFIG); val &= ~(PL080_CONFIG_M2_BE | PL080_CONFIG_M1_BE | PL080_CONFIG_ENABLE); - /* We implictly clear bit 1 and that means little-endian mode */ + /* We implicitly clear bit 1 and that means little-endian mode */ val |= PL080_CONFIG_ENABLE; writel(val, pl08x->base + PL080_CONFIG); } +static void pl08x_unmap_buffers(struct pl08x_txd *txd) +{ + struct device *dev = txd->tx.chan->device->dev; + + if (!(txd->tx.flags & DMA_COMPL_SKIP_SRC_UNMAP)) { + if (txd->tx.flags & DMA_COMPL_SRC_UNMAP_SINGLE) + dma_unmap_single(dev, txd->src_addr, txd->len, + DMA_TO_DEVICE); + else + dma_unmap_page(dev, txd->src_addr, txd->len, + DMA_TO_DEVICE); + } + if (!(txd->tx.flags & DMA_COMPL_SKIP_DEST_UNMAP)) { + if (txd->tx.flags & DMA_COMPL_DEST_UNMAP_SINGLE) + dma_unmap_single(dev, txd->dst_addr, txd->len, + DMA_FROM_DEVICE); + else + dma_unmap_page(dev, txd->dst_addr, txd->len, + DMA_FROM_DEVICE); + } +} + static void pl08x_tasklet(unsigned long data) { struct pl08x_dma_chan *plchan = (struct pl08x_dma_chan *) data; - struct pl08x_phy_chan *phychan = plchan->phychan; struct pl08x_driver_data *pl08x = plchan->host; + struct pl08x_txd *txd; + unsigned long flags; - if (!plchan) - BUG(); - - spin_lock(&plchan->lock); - - if (plchan->at) { - dma_async_tx_callback callback = - plchan->at->tx.callback; - void *callback_param = - plchan->at->tx.callback_param; - - /* - * Update last completed - */ - plchan->lc = - (plchan->at->tx.cookie); - - /* - * Callback to signal completion - */ - if (callback) - callback(callback_param); + spin_lock_irqsave(&plchan->lock, flags); - /* - * Device callbacks should NOT clear - * the current transaction on the channel - * Linus: sometimes they should? - */ - if (!plchan->at) - BUG(); + txd = plchan->at; + plchan->at = NULL; - /* - * Free the descriptor if it's not for a device - * using a circular buffer - */ - if (!plchan->at->cd->circular_buffer) { - pl08x_free_txd(pl08x, plchan->at); - plchan->at = NULL; - } - /* - * else descriptor for circular - * buffers only freed when - * client has disabled dma - */ + if (txd) { + /* Update last completed */ + plchan->lc = txd->tx.cookie; } - /* - * If a new descriptor is queued, set it up - * plchan->at is NULL here - */ - if (!list_empty(&plchan->desc_list)) { + + /* If a new descriptor is queued, set it up plchan->at is NULL here */ + if (!list_empty(&plchan->pend_list)) { struct pl08x_txd *next; - next = list_first_entry(&plchan->desc_list, + next = list_first_entry(&plchan->pend_list, struct pl08x_txd, node); list_del(&next->node); - plchan->at = next; - /* Configure the physical channel for the next txd */ - pl08x_config_phychan_for_txd(plchan); - pl08x_set_cregs(pl08x, plchan->phychan); - pl08x_enable_phy_chan(pl08x, plchan->phychan); + + pl08x_start_txd(plchan, next); + } else if (plchan->phychan_hold) { + /* + * This channel is still in use - we have a new txd being + * prepared and will soon be queued. Don't give up the + * physical channel. + */ } else { struct pl08x_dma_chan *waiting = NULL; @@ -1699,20 +1575,14 @@ static void pl08x_tasklet(unsigned long data) * No more jobs, so free up the physical channel * Free any allocated signal on slave transfers too */ - if ((phychan->signal >= 0) && pl08x->pd->put_signal) { - pl08x->pd->put_signal(plchan); - phychan->signal = -1; - } - pl08x_put_phy_channel(pl08x, phychan); - plchan->phychan = NULL; + release_phy_channel(plchan); plchan->state = PL08X_CHAN_IDLE; /* - * And NOW before anyone else can grab that free:d - * up physical channel, see if there is some memcpy - * pending that seriously needs to start because of - * being stacked up while we were choking the - * physical channels with data. + * And NOW before anyone else can grab that free:d up + * physical channel, see if there is some memcpy pending + * that seriously needs to start because of being stacked + * up while we were choking the physical channels with data. */ list_for_each_entry(waiting, &pl08x->memcpy.channels, chan.device_node) { @@ -1724,6 +1594,7 @@ static void pl08x_tasklet(unsigned long data) ret = prep_phy_channel(waiting, waiting->waiting); BUG_ON(ret); + waiting->phychan_hold--; waiting->state = PL08X_CHAN_RUNNING; waiting->waiting = NULL; pl08x_issue_pending(&waiting->chan); @@ -1732,7 +1603,25 @@ static void pl08x_tasklet(unsigned long data) } } - spin_unlock(&plchan->lock); + spin_unlock_irqrestore(&plchan->lock, flags); + + if (txd) { + dma_async_tx_callback callback = txd->tx.callback; + void *callback_param = txd->tx.callback_param; + + /* Don't try to unmap buffers on slave channels */ + if (!plchan->slave) + pl08x_unmap_buffers(txd); + + /* Free the descriptor */ + spin_lock_irqsave(&plchan->lock, flags); + pl08x_free_txd(pl08x, txd); + spin_unlock_irqrestore(&plchan->lock, flags); + + /* Callback to signal completion */ + if (callback) + callback(callback_param); + } } static irqreturn_t pl08x_irq(int irq, void *dev) @@ -1744,9 +1633,7 @@ static irqreturn_t pl08x_irq(int irq, void *dev) val = readl(pl08x->base + PL080_ERR_STATUS); if (val) { - /* - * An error interrupt (on one or more channels) - */ + /* An error interrupt (on one or more channels) */ dev_err(&pl08x->adev->dev, "%s error interrupt, register value 0x%08x\n", __func__, val); @@ -1770,9 +1657,7 @@ static irqreturn_t pl08x_irq(int irq, void *dev) mask |= (1 << i); } } - /* - * Clear only the terminal interrupts on channels we processed - */ + /* Clear only the terminal interrupts on channels we processed */ writel(mask, pl08x->base + PL080_TC_CLEAR); return mask ? IRQ_HANDLED : IRQ_NONE; @@ -1791,6 +1676,7 @@ static int pl08x_dma_init_virtual_channels(struct pl08x_driver_data *pl08x, int i; INIT_LIST_HEAD(&dmadev->channels); + /* * Register as many many memcpy as we have physical channels, * we won't always be able to use all but the code will have @@ -1819,16 +1705,23 @@ static int pl08x_dma_init_virtual_channels(struct pl08x_driver_data *pl08x, return -ENOMEM; } } + if (chan->cd->circular_buffer) { + dev_err(&pl08x->adev->dev, + "channel %s: circular buffers not supported\n", + chan->name); + kfree(chan); + continue; + } dev_info(&pl08x->adev->dev, "initialize virtual channel \"%s\"\n", chan->name); chan->chan.device = dmadev; - atomic_set(&chan->last_issued, 0); - chan->lc = atomic_read(&chan->last_issued); + chan->chan.cookie = 0; + chan->lc = 0; spin_lock_init(&chan->lock); - INIT_LIST_HEAD(&chan->desc_list); + INIT_LIST_HEAD(&chan->pend_list); tasklet_init(&chan->tasklet, pl08x_tasklet, (unsigned long) chan); @@ -1898,7 +1791,7 @@ static int pl08x_debugfs_show(struct seq_file *s, void *data) seq_printf(s, "CHANNEL:\tSTATE:\n"); seq_printf(s, "--------\t------\n"); list_for_each_entry(chan, &pl08x->memcpy.channels, chan.device_node) { - seq_printf(s, "%s\t\t\%s\n", chan->name, + seq_printf(s, "%s\t\t%s\n", chan->name, pl08x_state_str(chan->state)); } @@ -1906,7 +1799,7 @@ static int pl08x_debugfs_show(struct seq_file *s, void *data) seq_printf(s, "CHANNEL:\tSTATE:\n"); seq_printf(s, "--------\t------\n"); list_for_each_entry(chan, &pl08x->slave.channels, chan.device_node) { - seq_printf(s, "%s\t\t\%s\n", chan->name, + seq_printf(s, "%s\t\t%s\n", chan->name, pl08x_state_str(chan->state)); } @@ -1942,7 +1835,7 @@ static inline void init_pl08x_debugfs(struct pl08x_driver_data *pl08x) static int pl08x_probe(struct amba_device *adev, struct amba_id *id) { struct pl08x_driver_data *pl08x; - struct vendor_data *vd = id->data; + const struct vendor_data *vd = id->data; int ret = 0; int i; @@ -1990,6 +1883,14 @@ static int pl08x_probe(struct amba_device *adev, struct amba_id *id) pl08x->adev = adev; pl08x->vd = vd; + /* By default, AHB1 only. If dualmaster, from platform */ + pl08x->lli_buses = PL08X_AHB1; + pl08x->mem_buses = PL08X_AHB1; + if (pl08x->vd->dualmaster) { + pl08x->lli_buses = pl08x->pd->lli_buses; + pl08x->mem_buses = pl08x->pd->mem_buses; + } + /* A DMA memory pool for LLIs, align on 1-byte boundary */ pl08x->pool = dma_pool_create(DRIVER_NAME, &pl08x->adev->dev, PL08X_LLI_TSFR_SIZE, PL08X_ALIGN, 0); @@ -2009,14 +1910,12 @@ static int pl08x_probe(struct amba_device *adev, struct amba_id *id) /* Turn on the PL08x */ pl08x_ensure_on(pl08x); - /* - * Attach the interrupt handler - */ + /* Attach the interrupt handler */ writel(0x000000FF, pl08x->base + PL080_ERR_CLEAR); writel(0x000000FF, pl08x->base + PL080_TC_CLEAR); ret = request_irq(adev->irq[0], pl08x_irq, IRQF_DISABLED, - vd->name, pl08x); + DRIVER_NAME, pl08x); if (ret) { dev_err(&adev->dev, "%s failed to request interrupt %d\n", __func__, adev->irq[0]); @@ -2087,8 +1986,9 @@ static int pl08x_probe(struct amba_device *adev, struct amba_id *id) amba_set_drvdata(adev, pl08x); init_pl08x_debugfs(pl08x); - dev_info(&pl08x->adev->dev, "ARM(R) %s DMA block initialized @%08x\n", - vd->name, adev->res.start); + dev_info(&pl08x->adev->dev, "DMA: PL%03x rev%u at 0x%08llx irq %d\n", + amba_part(adev), amba_rev(adev), + (unsigned long long)adev->res.start, adev->irq[0]); return 0; out_no_slave_reg: @@ -2115,13 +2015,11 @@ out_no_pl08x: /* PL080 has 8 channels and the PL080 have just 2 */ static struct vendor_data vendor_pl080 = { - .name = "PL080", .channels = 8, .dualmaster = true, }; static struct vendor_data vendor_pl081 = { - .name = "PL081", .channels = 2, .dualmaster = false, }; @@ -2160,7 +2058,7 @@ static int __init pl08x_init(void) retval = amba_driver_register(&pl08x_amba_driver); if (retval) printk(KERN_WARNING DRIVER_NAME - "failed to register as an amba device (%d)\n", + "failed to register as an AMBA device (%d)\n", retval); return retval; } diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index ea0ee81cff53..3d7d705f026f 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -253,7 +253,7 @@ atc_chain_complete(struct at_dma_chan *atchan, struct at_desc *desc) /* move myself to free_list */ list_move(&desc->desc_node, &atchan->free_list); - /* unmap dma addresses */ + /* unmap dma addresses (not on slave channels) */ if (!atchan->chan_common.private) { struct device *parent = chan2parent(&atchan->chan_common); if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) { @@ -583,7 +583,6 @@ atc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, desc->lli.ctrlb = ctrlb; desc->txd.cookie = 0; - async_tx_ack(&desc->txd); if (!first) { first = desc; @@ -604,7 +603,7 @@ atc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, /* set end-of-link to the last link descriptor of list*/ set_desc_eol(desc); - desc->txd.flags = flags; /* client is in control of this ack */ + first->txd.flags = flags; /* client is in control of this ack */ return &first->txd; @@ -670,7 +669,7 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, if (!desc) goto err_desc_get; - mem = sg_phys(sg); + mem = sg_dma_address(sg); len = sg_dma_len(sg); mem_width = 2; if (unlikely(mem & 3 || len & 3)) @@ -712,7 +711,7 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, if (!desc) goto err_desc_get; - mem = sg_phys(sg); + mem = sg_dma_address(sg); len = sg_dma_len(sg); mem_width = 2; if (unlikely(mem & 3 || len & 3)) @@ -749,8 +748,8 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, first->txd.cookie = -EBUSY; first->len = total_len; - /* last link descriptor of list is responsible of flags */ - prev->txd.flags = flags; /* client is in control of this ack */ + /* first link descriptor of list is responsible of flags */ + first->txd.flags = flags; /* client is in control of this ack */ return &first->txd; @@ -854,11 +853,11 @@ static void atc_issue_pending(struct dma_chan *chan) dev_vdbg(chan2dev(chan), "issue_pending\n"); + spin_lock_bh(&atchan->lock); if (!atc_chan_is_enabled(atchan)) { - spin_lock_bh(&atchan->lock); atc_advance_work(atchan); - spin_unlock_bh(&atchan->lock); } + spin_unlock_bh(&atchan->lock); } /** @@ -1210,7 +1209,7 @@ static int __init at_dma_init(void) { return platform_driver_probe(&at_dma_driver, at_dma_probe); } -module_init(at_dma_init); +subsys_initcall(at_dma_init); static void __exit at_dma_exit(void) { diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c index e5e172d21692..4de947a450fc 100644 --- a/drivers/dma/fsldma.c +++ b/drivers/dma/fsldma.c @@ -1,7 +1,7 @@ /* * Freescale MPC85xx, MPC83xx DMA Engine support * - * Copyright (C) 2007 Freescale Semiconductor, Inc. All rights reserved. + * Copyright (C) 2007-2010 Freescale Semiconductor, Inc. All rights reserved. * * Author: * Zhang Wei <wei.zhang@freescale.com>, Jul 2007 @@ -1324,6 +1324,8 @@ static int __devinit fsldma_of_probe(struct platform_device *op, fdev->common.device_control = fsl_dma_device_control; fdev->common.dev = &op->dev; + dma_set_mask(&(op->dev), DMA_BIT_MASK(36)); + dev_set_drvdata(&op->dev, fdev); /* diff --git a/drivers/dma/intel_mid_dma.c b/drivers/dma/intel_mid_dma.c index 78266382797e..798f46a4590d 100644 --- a/drivers/dma/intel_mid_dma.c +++ b/drivers/dma/intel_mid_dma.c @@ -664,11 +664,20 @@ static struct dma_async_tx_descriptor *intel_mid_dma_prep_memcpy( /*calculate CTL_LO*/ ctl_lo.ctl_lo = 0; ctl_lo.ctlx.int_en = 1; - ctl_lo.ctlx.dst_tr_width = mids->dma_slave.dst_addr_width; - ctl_lo.ctlx.src_tr_width = mids->dma_slave.src_addr_width; ctl_lo.ctlx.dst_msize = mids->dma_slave.src_maxburst; ctl_lo.ctlx.src_msize = mids->dma_slave.dst_maxburst; + /* + * Here we need some translation from "enum dma_slave_buswidth" + * to the format for our dma controller + * standard intel_mid_dmac's format + * 1 Byte 0b000 + * 2 Bytes 0b001 + * 4 Bytes 0b010 + */ + ctl_lo.ctlx.dst_tr_width = mids->dma_slave.dst_addr_width / 2; + ctl_lo.ctlx.src_tr_width = mids->dma_slave.src_addr_width / 2; + if (mids->cfg_mode == LNW_DMA_MEM_TO_MEM) { ctl_lo.ctlx.tt_fc = 0; ctl_lo.ctlx.sinc = 0; @@ -746,8 +755,18 @@ static struct dma_async_tx_descriptor *intel_mid_dma_prep_slave_sg( BUG_ON(!mids); if (!midc->dma->pimr_mask) { - pr_debug("MDMA: SG list is not supported by this controller\n"); - return NULL; + /* We can still handle sg list with only one item */ + if (sg_len == 1) { + txd = intel_mid_dma_prep_memcpy(chan, + mids->dma_slave.dst_addr, + mids->dma_slave.src_addr, + sgl->length, + flags); + return txd; + } else { + pr_warn("MDMA: SG list is not supported by this controller\n"); + return NULL; + } } pr_debug("MDMA: SG Length = %d, direction = %d, Flags = %#lx\n", @@ -758,6 +777,7 @@ static struct dma_async_tx_descriptor *intel_mid_dma_prep_slave_sg( pr_err("MDMA: Prep memcpy failed\n"); return NULL; } + desc = to_intel_mid_dma_desc(txd); desc->dirn = direction; ctl_lo.ctl_lo = desc->ctl_lo; @@ -1021,11 +1041,6 @@ static irqreturn_t intel_mid_dma_interrupt(int irq, void *data) /*DMA Interrupt*/ pr_debug("MDMA:Got an interrupt on irq %d\n", irq); - if (!mid) { - pr_err("ERR_MDMA:null pointer mid\n"); - return -EINVAL; - } - pr_debug("MDMA: Status %x, Mask %x\n", tfr_status, mid->intr_mask); tfr_status &= mid->intr_mask; if (tfr_status) { diff --git a/drivers/dma/iop-adma.c b/drivers/dma/iop-adma.c index 161c452923b8..c6b01f535b29 100644 --- a/drivers/dma/iop-adma.c +++ b/drivers/dma/iop-adma.c @@ -1261,7 +1261,7 @@ out: return err; } -#ifdef CONFIG_MD_RAID6_PQ +#ifdef CONFIG_RAID6_PQ static int __devinit iop_adma_pq_zero_sum_self_test(struct iop_adma_device *device) { @@ -1584,7 +1584,7 @@ static int __devinit iop_adma_probe(struct platform_device *pdev) if (dma_has_cap(DMA_PQ, dma_dev->cap_mask) && dma_has_cap(DMA_PQ_VAL, dma_dev->cap_mask)) { - #ifdef CONFIG_MD_RAID6_PQ + #ifdef CONFIG_RAID6_PQ ret = iop_adma_pq_zero_sum_self_test(adev); dev_dbg(&pdev->dev, "pq self test returned %d\n", ret); #else diff --git a/drivers/dma/pch_dma.c b/drivers/dma/pch_dma.c index c064c89420d0..1c38418ae61f 100644 --- a/drivers/dma/pch_dma.c +++ b/drivers/dma/pch_dma.c @@ -1,6 +1,7 @@ /* * Topcliff PCH DMA controller driver * Copyright (c) 2010 Intel Corporation + * Copyright (C) 2011 OKI SEMICONDUCTOR CO., LTD. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -921,12 +922,19 @@ static void __devexit pch_dma_remove(struct pci_dev *pdev) } /* PCI Device ID of DMA device */ -#define PCI_DEVICE_ID_PCH_DMA_8CH 0x8810 -#define PCI_DEVICE_ID_PCH_DMA_4CH 0x8815 +#define PCI_VENDOR_ID_ROHM 0x10DB +#define PCI_DEVICE_ID_EG20T_PCH_DMA_8CH 0x8810 +#define PCI_DEVICE_ID_EG20T_PCH_DMA_4CH 0x8815 +#define PCI_DEVICE_ID_ML7213_DMA1_8CH 0x8026 +#define PCI_DEVICE_ID_ML7213_DMA2_8CH 0x802B +#define PCI_DEVICE_ID_ML7213_DMA3_4CH 0x8034 static const struct pci_device_id pch_dma_id_table[] = { - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_PCH_DMA_8CH), 8 }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_PCH_DMA_4CH), 4 }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_EG20T_PCH_DMA_8CH), 8 }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_EG20T_PCH_DMA_4CH), 4 }, + { PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ML7213_DMA1_8CH), 8}, /* UART Video */ + { PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ML7213_DMA2_8CH), 8}, /* PCMIF SPI */ + { PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ML7213_DMA3_4CH), 4}, /* FPGA */ { 0, }, }; @@ -954,6 +962,7 @@ static void __exit pch_dma_exit(void) module_init(pch_dma_init); module_exit(pch_dma_exit); -MODULE_DESCRIPTION("Topcliff PCH DMA controller driver"); +MODULE_DESCRIPTION("Intel EG20T PCH / OKI SEMICONDUCTOR ML7213 IOH " + "DMA controller driver"); MODULE_AUTHOR("Yong Wang <yong.y.wang@intel.com>"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c index fab68a553205..6e1d46a65d0e 100644 --- a/drivers/dma/ste_dma40.c +++ b/drivers/dma/ste_dma40.c @@ -1,5 +1,6 @@ /* - * Copyright (C) ST-Ericsson SA 2007-2010 + * Copyright (C) Ericsson AB 2007-2008 + * Copyright (C) ST-Ericsson SA 2008-2010 * Author: Per Forlin <per.forlin@stericsson.com> for ST-Ericsson * Author: Jonas Aaberg <jonas.aberg@stericsson.com> for ST-Ericsson * License terms: GNU General Public License (GPL) version 2 @@ -554,8 +555,66 @@ static struct d40_desc *d40_last_queued(struct d40_chan *d40c) return d; } -/* Support functions for logical channels */ +static int d40_psize_2_burst_size(bool is_log, int psize) +{ + if (is_log) { + if (psize == STEDMA40_PSIZE_LOG_1) + return 1; + } else { + if (psize == STEDMA40_PSIZE_PHY_1) + return 1; + } + + return 2 << psize; +} + +/* + * The dma only supports transmitting packages up to + * STEDMA40_MAX_SEG_SIZE << data_width. Calculate the total number of + * dma elements required to send the entire sg list + */ +static int d40_size_2_dmalen(int size, u32 data_width1, u32 data_width2) +{ + int dmalen; + u32 max_w = max(data_width1, data_width2); + u32 min_w = min(data_width1, data_width2); + u32 seg_max = ALIGN(STEDMA40_MAX_SEG_SIZE << min_w, 1 << max_w); + + if (seg_max > STEDMA40_MAX_SEG_SIZE) + seg_max -= (1 << max_w); + + if (!IS_ALIGNED(size, 1 << max_w)) + return -EINVAL; + + if (size <= seg_max) + dmalen = 1; + else { + dmalen = size / seg_max; + if (dmalen * seg_max < size) + dmalen++; + } + return dmalen; +} + +static int d40_sg_2_dmalen(struct scatterlist *sgl, int sg_len, + u32 data_width1, u32 data_width2) +{ + struct scatterlist *sg; + int i; + int len = 0; + int ret; + + for_each_sg(sgl, sg, sg_len, i) { + ret = d40_size_2_dmalen(sg_dma_len(sg), + data_width1, data_width2); + if (ret < 0) + return ret; + len += ret; + } + return len; +} +/* Support functions for logical channels */ static int d40_channel_execute_command(struct d40_chan *d40c, enum d40_command command) @@ -1241,6 +1300,21 @@ static int d40_validate_conf(struct d40_chan *d40c, res = -EINVAL; } + if (d40_psize_2_burst_size(is_log, conf->src_info.psize) * + (1 << conf->src_info.data_width) != + d40_psize_2_burst_size(is_log, conf->dst_info.psize) * + (1 << conf->dst_info.data_width)) { + /* + * The DMAC hardware only supports + * src (burst x width) == dst (burst x width) + */ + + dev_err(&d40c->chan.dev->device, + "[%s] src (burst x width) != dst (burst x width)\n", + __func__); + res = -EINVAL; + } + return res; } @@ -1638,13 +1712,21 @@ struct dma_async_tx_descriptor *stedma40_memcpy_sg(struct dma_chan *chan, if (d40d == NULL) goto err; - d40d->lli_len = sgl_len; + d40d->lli_len = d40_sg_2_dmalen(sgl_dst, sgl_len, + d40c->dma_cfg.src_info.data_width, + d40c->dma_cfg.dst_info.data_width); + if (d40d->lli_len < 0) { + dev_err(&d40c->chan.dev->device, + "[%s] Unaligned size\n", __func__); + goto err; + } + d40d->lli_current = 0; d40d->txd.flags = dma_flags; if (d40c->log_num != D40_PHY_CHAN) { - if (d40_pool_lli_alloc(d40d, sgl_len, true) < 0) { + if (d40_pool_lli_alloc(d40d, d40d->lli_len, true) < 0) { dev_err(&d40c->chan.dev->device, "[%s] Out of memory\n", __func__); goto err; @@ -1654,15 +1736,17 @@ struct dma_async_tx_descriptor *stedma40_memcpy_sg(struct dma_chan *chan, sgl_len, d40d->lli_log.src, d40c->log_def.lcsp1, - d40c->dma_cfg.src_info.data_width); + d40c->dma_cfg.src_info.data_width, + d40c->dma_cfg.dst_info.data_width); (void) d40_log_sg_to_lli(sgl_dst, sgl_len, d40d->lli_log.dst, d40c->log_def.lcsp3, - d40c->dma_cfg.dst_info.data_width); + d40c->dma_cfg.dst_info.data_width, + d40c->dma_cfg.src_info.data_width); } else { - if (d40_pool_lli_alloc(d40d, sgl_len, false) < 0) { + if (d40_pool_lli_alloc(d40d, d40d->lli_len, false) < 0) { dev_err(&d40c->chan.dev->device, "[%s] Out of memory\n", __func__); goto err; @@ -1675,6 +1759,7 @@ struct dma_async_tx_descriptor *stedma40_memcpy_sg(struct dma_chan *chan, virt_to_phys(d40d->lli_phy.src), d40c->src_def_cfg, d40c->dma_cfg.src_info.data_width, + d40c->dma_cfg.dst_info.data_width, d40c->dma_cfg.src_info.psize); if (res < 0) @@ -1687,6 +1772,7 @@ struct dma_async_tx_descriptor *stedma40_memcpy_sg(struct dma_chan *chan, virt_to_phys(d40d->lli_phy.dst), d40c->dst_def_cfg, d40c->dma_cfg.dst_info.data_width, + d40c->dma_cfg.src_info.data_width, d40c->dma_cfg.dst_info.psize); if (res < 0) @@ -1826,7 +1912,6 @@ static struct dma_async_tx_descriptor *d40_prep_memcpy(struct dma_chan *chan, struct d40_chan *d40c = container_of(chan, struct d40_chan, chan); unsigned long flags; - int err = 0; if (d40c->phy_chan == NULL) { dev_err(&d40c->chan.dev->device, @@ -1844,6 +1929,15 @@ static struct dma_async_tx_descriptor *d40_prep_memcpy(struct dma_chan *chan, } d40d->txd.flags = dma_flags; + d40d->lli_len = d40_size_2_dmalen(size, + d40c->dma_cfg.src_info.data_width, + d40c->dma_cfg.dst_info.data_width); + if (d40d->lli_len < 0) { + dev_err(&d40c->chan.dev->device, + "[%s] Unaligned size\n", __func__); + goto err; + } + dma_async_tx_descriptor_init(&d40d->txd, chan); @@ -1851,37 +1945,40 @@ static struct dma_async_tx_descriptor *d40_prep_memcpy(struct dma_chan *chan, if (d40c->log_num != D40_PHY_CHAN) { - if (d40_pool_lli_alloc(d40d, 1, true) < 0) { + if (d40_pool_lli_alloc(d40d, d40d->lli_len, true) < 0) { dev_err(&d40c->chan.dev->device, "[%s] Out of memory\n", __func__); goto err; } - d40d->lli_len = 1; d40d->lli_current = 0; - d40_log_fill_lli(d40d->lli_log.src, - src, - size, - d40c->log_def.lcsp1, - d40c->dma_cfg.src_info.data_width, - true); + if (d40_log_buf_to_lli(d40d->lli_log.src, + src, + size, + d40c->log_def.lcsp1, + d40c->dma_cfg.src_info.data_width, + d40c->dma_cfg.dst_info.data_width, + true) == NULL) + goto err; - d40_log_fill_lli(d40d->lli_log.dst, - dst, - size, - d40c->log_def.lcsp3, - d40c->dma_cfg.dst_info.data_width, - true); + if (d40_log_buf_to_lli(d40d->lli_log.dst, + dst, + size, + d40c->log_def.lcsp3, + d40c->dma_cfg.dst_info.data_width, + d40c->dma_cfg.src_info.data_width, + true) == NULL) + goto err; } else { - if (d40_pool_lli_alloc(d40d, 1, false) < 0) { + if (d40_pool_lli_alloc(d40d, d40d->lli_len, false) < 0) { dev_err(&d40c->chan.dev->device, "[%s] Out of memory\n", __func__); goto err; } - err = d40_phy_fill_lli(d40d->lli_phy.src, + if (d40_phy_buf_to_lli(d40d->lli_phy.src, src, size, d40c->dma_cfg.src_info.psize, @@ -1889,11 +1986,11 @@ static struct dma_async_tx_descriptor *d40_prep_memcpy(struct dma_chan *chan, d40c->src_def_cfg, true, d40c->dma_cfg.src_info.data_width, - false); - if (err) - goto err_fill_lli; + d40c->dma_cfg.dst_info.data_width, + false) == NULL) + goto err; - err = d40_phy_fill_lli(d40d->lli_phy.dst, + if (d40_phy_buf_to_lli(d40d->lli_phy.dst, dst, size, d40c->dma_cfg.dst_info.psize, @@ -1901,10 +1998,9 @@ static struct dma_async_tx_descriptor *d40_prep_memcpy(struct dma_chan *chan, d40c->dst_def_cfg, true, d40c->dma_cfg.dst_info.data_width, - false); - - if (err) - goto err_fill_lli; + d40c->dma_cfg.src_info.data_width, + false) == NULL) + goto err; (void) dma_map_single(d40c->base->dev, d40d->lli_phy.src, d40d->lli_pool.size, DMA_TO_DEVICE); @@ -1913,9 +2009,6 @@ static struct dma_async_tx_descriptor *d40_prep_memcpy(struct dma_chan *chan, spin_unlock_irqrestore(&d40c->lock, flags); return &d40d->txd; -err_fill_lli: - dev_err(&d40c->chan.dev->device, - "[%s] Failed filling in PHY LLI\n", __func__); err: if (d40d) d40_desc_free(d40c, d40d); @@ -1945,13 +2038,21 @@ static int d40_prep_slave_sg_log(struct d40_desc *d40d, dma_addr_t dev_addr = 0; int total_size; - if (d40_pool_lli_alloc(d40d, sg_len, true) < 0) { + d40d->lli_len = d40_sg_2_dmalen(sgl, sg_len, + d40c->dma_cfg.src_info.data_width, + d40c->dma_cfg.dst_info.data_width); + if (d40d->lli_len < 0) { + dev_err(&d40c->chan.dev->device, + "[%s] Unaligned size\n", __func__); + return -EINVAL; + } + + if (d40_pool_lli_alloc(d40d, d40d->lli_len, true) < 0) { dev_err(&d40c->chan.dev->device, "[%s] Out of memory\n", __func__); return -ENOMEM; } - d40d->lli_len = sg_len; d40d->lli_current = 0; if (direction == DMA_FROM_DEVICE) @@ -1993,13 +2094,21 @@ static int d40_prep_slave_sg_phy(struct d40_desc *d40d, dma_addr_t dst_dev_addr; int res; - if (d40_pool_lli_alloc(d40d, sgl_len, false) < 0) { + d40d->lli_len = d40_sg_2_dmalen(sgl, sgl_len, + d40c->dma_cfg.src_info.data_width, + d40c->dma_cfg.dst_info.data_width); + if (d40d->lli_len < 0) { + dev_err(&d40c->chan.dev->device, + "[%s] Unaligned size\n", __func__); + return -EINVAL; + } + + if (d40_pool_lli_alloc(d40d, d40d->lli_len, false) < 0) { dev_err(&d40c->chan.dev->device, "[%s] Out of memory\n", __func__); return -ENOMEM; } - d40d->lli_len = sgl_len; d40d->lli_current = 0; if (direction == DMA_FROM_DEVICE) { @@ -2024,6 +2133,7 @@ static int d40_prep_slave_sg_phy(struct d40_desc *d40d, virt_to_phys(d40d->lli_phy.src), d40c->src_def_cfg, d40c->dma_cfg.src_info.data_width, + d40c->dma_cfg.dst_info.data_width, d40c->dma_cfg.src_info.psize); if (res < 0) return res; @@ -2035,6 +2145,7 @@ static int d40_prep_slave_sg_phy(struct d40_desc *d40d, virt_to_phys(d40d->lli_phy.dst), d40c->dst_def_cfg, d40c->dma_cfg.dst_info.data_width, + d40c->dma_cfg.src_info.data_width, d40c->dma_cfg.dst_info.psize); if (res < 0) return res; @@ -2244,6 +2355,8 @@ static void d40_set_runtime_config(struct dma_chan *chan, psize = STEDMA40_PSIZE_PHY_8; else if (config_maxburst >= 4) psize = STEDMA40_PSIZE_PHY_4; + else if (config_maxburst >= 2) + psize = STEDMA40_PSIZE_PHY_2; else psize = STEDMA40_PSIZE_PHY_1; } diff --git a/drivers/dma/ste_dma40_ll.c b/drivers/dma/ste_dma40_ll.c index 8557cb88b255..0b096a38322d 100644 --- a/drivers/dma/ste_dma40_ll.c +++ b/drivers/dma/ste_dma40_ll.c @@ -1,6 +1,6 @@ /* * Copyright (C) ST-Ericsson SA 2007-2010 - * Author: Per Friden <per.friden@stericsson.com> for ST-Ericsson + * Author: Per Forlin <per.forlin@stericsson.com> for ST-Ericsson * Author: Jonas Aaberg <jonas.aberg@stericsson.com> for ST-Ericsson * License terms: GNU General Public License (GPL) version 2 */ @@ -122,15 +122,15 @@ void d40_phy_cfg(struct stedma40_chan_cfg *cfg, *dst_cfg = dst; } -int d40_phy_fill_lli(struct d40_phy_lli *lli, - dma_addr_t data, - u32 data_size, - int psize, - dma_addr_t next_lli, - u32 reg_cfg, - bool term_int, - u32 data_width, - bool is_device) +static int d40_phy_fill_lli(struct d40_phy_lli *lli, + dma_addr_t data, + u32 data_size, + int psize, + dma_addr_t next_lli, + u32 reg_cfg, + bool term_int, + u32 data_width, + bool is_device) { int num_elems; @@ -139,13 +139,6 @@ int d40_phy_fill_lli(struct d40_phy_lli *lli, else num_elems = 2 << psize; - /* - * Size is 16bit. data_width is 8, 16, 32 or 64 bit - * Block large than 64 KiB must be split. - */ - if (data_size > (0xffff << data_width)) - return -EINVAL; - /* Must be aligned */ if (!IS_ALIGNED(data, 0x1 << data_width)) return -EINVAL; @@ -187,55 +180,118 @@ int d40_phy_fill_lli(struct d40_phy_lli *lli, return 0; } +static int d40_seg_size(int size, int data_width1, int data_width2) +{ + u32 max_w = max(data_width1, data_width2); + u32 min_w = min(data_width1, data_width2); + u32 seg_max = ALIGN(STEDMA40_MAX_SEG_SIZE << min_w, 1 << max_w); + + if (seg_max > STEDMA40_MAX_SEG_SIZE) + seg_max -= (1 << max_w); + + if (size <= seg_max) + return size; + + if (size <= 2 * seg_max) + return ALIGN(size / 2, 1 << max_w); + + return seg_max; +} + +struct d40_phy_lli *d40_phy_buf_to_lli(struct d40_phy_lli *lli, + dma_addr_t addr, + u32 size, + int psize, + dma_addr_t lli_phys, + u32 reg_cfg, + bool term_int, + u32 data_width1, + u32 data_width2, + bool is_device) +{ + int err; + dma_addr_t next = lli_phys; + int size_rest = size; + int size_seg = 0; + + do { + size_seg = d40_seg_size(size_rest, data_width1, data_width2); + size_rest -= size_seg; + + if (term_int && size_rest == 0) + next = 0; + else + next = ALIGN(next + sizeof(struct d40_phy_lli), + D40_LLI_ALIGN); + + err = d40_phy_fill_lli(lli, + addr, + size_seg, + psize, + next, + reg_cfg, + !next, + data_width1, + is_device); + + if (err) + goto err; + + lli++; + if (!is_device) + addr += size_seg; + } while (size_rest); + + return lli; + + err: + return NULL; +} + int d40_phy_sg_to_lli(struct scatterlist *sg, int sg_len, dma_addr_t target, - struct d40_phy_lli *lli, + struct d40_phy_lli *lli_sg, dma_addr_t lli_phys, u32 reg_cfg, - u32 data_width, + u32 data_width1, + u32 data_width2, int psize) { int total_size = 0; int i; struct scatterlist *current_sg = sg; - dma_addr_t next_lli_phys; dma_addr_t dst; - int err = 0; + struct d40_phy_lli *lli = lli_sg; + dma_addr_t l_phys = lli_phys; for_each_sg(sg, current_sg, sg_len, i) { total_size += sg_dma_len(current_sg); - /* If this scatter list entry is the last one, no next link */ - if (sg_len - 1 == i) - next_lli_phys = 0; - else - next_lli_phys = ALIGN(lli_phys + (i + 1) * - sizeof(struct d40_phy_lli), - D40_LLI_ALIGN); - if (target) dst = target; else dst = sg_phys(current_sg); - err = d40_phy_fill_lli(&lli[i], - dst, - sg_dma_len(current_sg), - psize, - next_lli_phys, - reg_cfg, - !next_lli_phys, - data_width, - target == dst); - if (err) - goto err; + l_phys = ALIGN(lli_phys + (lli - lli_sg) * + sizeof(struct d40_phy_lli), D40_LLI_ALIGN); + + lli = d40_phy_buf_to_lli(lli, + dst, + sg_dma_len(current_sg), + psize, + l_phys, + reg_cfg, + sg_len - 1 == i, + data_width1, + data_width2, + target == dst); + if (lli == NULL) + return -EINVAL; } return total_size; -err: - return err; } @@ -315,17 +371,20 @@ void d40_log_lli_lcla_write(struct d40_log_lli *lcla, writel(lli_dst->lcsp13, &lcla[1].lcsp13); } -void d40_log_fill_lli(struct d40_log_lli *lli, - dma_addr_t data, u32 data_size, - u32 reg_cfg, - u32 data_width, - bool addr_inc) +static void d40_log_fill_lli(struct d40_log_lli *lli, + dma_addr_t data, u32 data_size, + u32 reg_cfg, + u32 data_width, + bool addr_inc) { lli->lcsp13 = reg_cfg; /* The number of elements to transfer */ lli->lcsp02 = ((data_size >> data_width) << D40_MEM_LCSP0_ECNT_POS) & D40_MEM_LCSP0_ECNT_MASK; + + BUG_ON((data_size >> data_width) > STEDMA40_MAX_SEG_SIZE); + /* 16 LSBs address of the current element */ lli->lcsp02 |= data & D40_MEM_LCSP0_SPTR_MASK; /* 16 MSBs address of the current element */ @@ -348,55 +407,94 @@ int d40_log_sg_to_dev(struct scatterlist *sg, int total_size = 0; struct scatterlist *current_sg = sg; int i; + struct d40_log_lli *lli_src = lli->src; + struct d40_log_lli *lli_dst = lli->dst; for_each_sg(sg, current_sg, sg_len, i) { total_size += sg_dma_len(current_sg); if (direction == DMA_TO_DEVICE) { - d40_log_fill_lli(&lli->src[i], - sg_phys(current_sg), - sg_dma_len(current_sg), - lcsp->lcsp1, src_data_width, - true); - d40_log_fill_lli(&lli->dst[i], - dev_addr, - sg_dma_len(current_sg), - lcsp->lcsp3, dst_data_width, - false); + lli_src = + d40_log_buf_to_lli(lli_src, + sg_phys(current_sg), + sg_dma_len(current_sg), + lcsp->lcsp1, src_data_width, + dst_data_width, + true); + lli_dst = + d40_log_buf_to_lli(lli_dst, + dev_addr, + sg_dma_len(current_sg), + lcsp->lcsp3, dst_data_width, + src_data_width, + false); } else { - d40_log_fill_lli(&lli->dst[i], - sg_phys(current_sg), - sg_dma_len(current_sg), - lcsp->lcsp3, dst_data_width, - true); - d40_log_fill_lli(&lli->src[i], - dev_addr, - sg_dma_len(current_sg), - lcsp->lcsp1, src_data_width, - false); + lli_dst = + d40_log_buf_to_lli(lli_dst, + sg_phys(current_sg), + sg_dma_len(current_sg), + lcsp->lcsp3, dst_data_width, + src_data_width, + true); + lli_src = + d40_log_buf_to_lli(lli_src, + dev_addr, + sg_dma_len(current_sg), + lcsp->lcsp1, src_data_width, + dst_data_width, + false); } } return total_size; } +struct d40_log_lli *d40_log_buf_to_lli(struct d40_log_lli *lli_sg, + dma_addr_t addr, + int size, + u32 lcsp13, /* src or dst*/ + u32 data_width1, + u32 data_width2, + bool addr_inc) +{ + struct d40_log_lli *lli = lli_sg; + int size_rest = size; + int size_seg = 0; + + do { + size_seg = d40_seg_size(size_rest, data_width1, data_width2); + size_rest -= size_seg; + + d40_log_fill_lli(lli, + addr, + size_seg, + lcsp13, data_width1, + addr_inc); + if (addr_inc) + addr += size_seg; + lli++; + } while (size_rest); + + return lli; +} + int d40_log_sg_to_lli(struct scatterlist *sg, int sg_len, struct d40_log_lli *lli_sg, u32 lcsp13, /* src or dst*/ - u32 data_width) + u32 data_width1, u32 data_width2) { int total_size = 0; struct scatterlist *current_sg = sg; int i; + struct d40_log_lli *lli = lli_sg; for_each_sg(sg, current_sg, sg_len, i) { total_size += sg_dma_len(current_sg); - - d40_log_fill_lli(&lli_sg[i], - sg_phys(current_sg), - sg_dma_len(current_sg), - lcsp13, data_width, - true); + lli = d40_log_buf_to_lli(lli, + sg_phys(current_sg), + sg_dma_len(current_sg), + lcsp13, + data_width1, data_width2, true); } return total_size; } diff --git a/drivers/dma/ste_dma40_ll.h b/drivers/dma/ste_dma40_ll.h index 9e419b907544..9cc43495bea2 100644 --- a/drivers/dma/ste_dma40_ll.h +++ b/drivers/dma/ste_dma40_ll.h @@ -292,18 +292,20 @@ int d40_phy_sg_to_lli(struct scatterlist *sg, struct d40_phy_lli *lli, dma_addr_t lli_phys, u32 reg_cfg, - u32 data_width, + u32 data_width1, + u32 data_width2, int psize); -int d40_phy_fill_lli(struct d40_phy_lli *lli, - dma_addr_t data, - u32 data_size, - int psize, - dma_addr_t next_lli, - u32 reg_cfg, - bool term_int, - u32 data_width, - bool is_device); +struct d40_phy_lli *d40_phy_buf_to_lli(struct d40_phy_lli *lli, + dma_addr_t data, + u32 data_size, + int psize, + dma_addr_t next_lli, + u32 reg_cfg, + bool term_int, + u32 data_width1, + u32 data_width2, + bool is_device); void d40_phy_lli_write(void __iomem *virtbase, u32 phy_chan_num, @@ -312,12 +314,12 @@ void d40_phy_lli_write(void __iomem *virtbase, /* Logical channels */ -void d40_log_fill_lli(struct d40_log_lli *lli, - dma_addr_t data, - u32 data_size, - u32 reg_cfg, - u32 data_width, - bool addr_inc); +struct d40_log_lli *d40_log_buf_to_lli(struct d40_log_lli *lli_sg, + dma_addr_t addr, + int size, + u32 lcsp13, /* src or dst*/ + u32 data_width1, u32 data_width2, + bool addr_inc); int d40_log_sg_to_dev(struct scatterlist *sg, int sg_len, @@ -332,7 +334,7 @@ int d40_log_sg_to_lli(struct scatterlist *sg, int sg_len, struct d40_log_lli *lli_sg, u32 lcsp13, /* src or dst*/ - u32 data_width); + u32 data_width1, u32 data_width2); void d40_log_lli_lcpa_write(struct d40_log_lli_full *lcpa, struct d40_log_lli *lli_dst, diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h index 46e32573b3a3..01bffc4412d2 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drv.h +++ b/drivers/gpu/drm/nouveau/nouveau_drv.h @@ -160,6 +160,7 @@ enum nouveau_flags { #define NVOBJ_FLAG_ZERO_ALLOC (1 << 1) #define NVOBJ_FLAG_ZERO_FREE (1 << 2) #define NVOBJ_FLAG_VM (1 << 3) +#define NVOBJ_FLAG_VM_USER (1 << 4) #define NVOBJ_CINST_GLOBAL 0xdeadbeef @@ -1576,6 +1577,20 @@ nv_match_device(struct drm_device *dev, unsigned device, dev->pdev->subsystem_device == sub_device; } +/* returns 1 if device is one of the nv4x using the 0x4497 object class, + * helpful to determine a number of other hardware features + */ +static inline int +nv44_graph_class(struct drm_device *dev) +{ + struct drm_nouveau_private *dev_priv = dev->dev_private; + + if ((dev_priv->chipset & 0xf0) == 0x60) + return 1; + + return !(0x0baf & (1 << (dev_priv->chipset & 0x0f))); +} + /* memory type/access flags, do not match hardware values */ #define NV_MEM_ACCESS_RO 1 #define NV_MEM_ACCESS_WO 2 diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.c b/drivers/gpu/drm/nouveau/nouveau_fbcon.c index 6d56a54b6e2e..60769d2f9a66 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fbcon.c +++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.c @@ -352,8 +352,8 @@ nouveau_fbcon_create(struct nouveau_fbdev *nfbdev, FBINFO_HWACCEL_IMAGEBLIT; info->flags |= FBINFO_CAN_FORCE_OUTPUT; info->fbops = &nouveau_fbcon_sw_ops; - info->fix.smem_start = dev->mode_config.fb_base + - (nvbo->bo.mem.start << PAGE_SHIFT); + info->fix.smem_start = nvbo->bo.mem.bus.base + + nvbo->bo.mem.bus.offset; info->fix.smem_len = size; info->screen_base = nvbo_kmap_obj_iovirtual(nouveau_fb->nvbo); diff --git a/drivers/gpu/drm/nouveau/nouveau_mem.c b/drivers/gpu/drm/nouveau/nouveau_mem.c index 69044eb104bb..26347b7cd872 100644 --- a/drivers/gpu/drm/nouveau/nouveau_mem.c +++ b/drivers/gpu/drm/nouveau/nouveau_mem.c @@ -742,30 +742,24 @@ nouveau_vram_manager_debug(struct ttm_mem_type_manager *man, const char *prefix) { struct nouveau_mm *mm = man->priv; struct nouveau_mm_node *r; - u64 total = 0, ttotal[3] = {}, tused[3] = {}, tfree[3] = {}; - int i; + u32 total = 0, free = 0; mutex_lock(&mm->mutex); list_for_each_entry(r, &mm->nodes, nl_entry) { - printk(KERN_DEBUG "%s %s-%d: 0x%010llx 0x%010llx\n", - prefix, r->free ? "free" : "used", r->type, - ((u64)r->offset << 12), + printk(KERN_DEBUG "%s %d: 0x%010llx 0x%010llx\n", + prefix, r->type, ((u64)r->offset << 12), (((u64)r->offset + r->length) << 12)); + total += r->length; - ttotal[r->type] += r->length; - if (r->free) - tfree[r->type] += r->length; - else - tused[r->type] += r->length; + if (!r->type) + free += r->length; } mutex_unlock(&mm->mutex); - printk(KERN_DEBUG "%s total: 0x%010llx\n", prefix, total << 12); - for (i = 0; i < 3; i++) { - printk(KERN_DEBUG "%s type %d: 0x%010llx, " - "used 0x%010llx, free 0x%010llx\n", prefix, - i, ttotal[i] << 12, tused[i] << 12, tfree[i] << 12); - } + printk(KERN_DEBUG "%s total: 0x%010llx free: 0x%010llx\n", + prefix, (u64)total << 12, (u64)free << 12); + printk(KERN_DEBUG "%s block: 0x%08x\n", + prefix, mm->block_size << 12); } const struct ttm_mem_type_manager_func nouveau_vram_manager = { diff --git a/drivers/gpu/drm/nouveau/nouveau_mm.c b/drivers/gpu/drm/nouveau/nouveau_mm.c index cdbb11eb701b..8844b50c3e54 100644 --- a/drivers/gpu/drm/nouveau/nouveau_mm.c +++ b/drivers/gpu/drm/nouveau/nouveau_mm.c @@ -48,175 +48,76 @@ region_split(struct nouveau_mm *rmm, struct nouveau_mm_node *a, u32 size) b->offset = a->offset; b->length = size; - b->free = a->free; b->type = a->type; a->offset += size; a->length -= size; list_add_tail(&b->nl_entry, &a->nl_entry); - if (b->free) + if (b->type == 0) list_add_tail(&b->fl_entry, &a->fl_entry); return b; } -static struct nouveau_mm_node * -nouveau_mm_merge(struct nouveau_mm *rmm, struct nouveau_mm_node *this) -{ - struct nouveau_mm_node *prev, *next; - - /* try to merge with free adjacent entries of same type */ - prev = list_entry(this->nl_entry.prev, struct nouveau_mm_node, nl_entry); - if (this->nl_entry.prev != &rmm->nodes) { - if (prev->free && prev->type == this->type) { - prev->length += this->length; - region_put(rmm, this); - this = prev; - } - } - - next = list_entry(this->nl_entry.next, struct nouveau_mm_node, nl_entry); - if (this->nl_entry.next != &rmm->nodes) { - if (next->free && next->type == this->type) { - next->offset = this->offset; - next->length += this->length; - region_put(rmm, this); - this = next; - } - } - - return this; -} +#define node(root, dir) ((root)->nl_entry.dir == &rmm->nodes) ? NULL : \ + list_entry((root)->nl_entry.dir, struct nouveau_mm_node, nl_entry) void nouveau_mm_put(struct nouveau_mm *rmm, struct nouveau_mm_node *this) { - u32 block_s, block_l; + struct nouveau_mm_node *prev = node(this, prev); + struct nouveau_mm_node *next = node(this, next); - this->free = true; list_add(&this->fl_entry, &rmm->free); - this = nouveau_mm_merge(rmm, this); - - /* any entirely free blocks now? we'll want to remove typing - * on them now so they can be use for any memory allocation - */ - block_s = roundup(this->offset, rmm->block_size); - if (block_s + rmm->block_size > this->offset + this->length) - return; + this->type = 0; - /* split off any still-typed region at the start */ - if (block_s != this->offset) { - if (!region_split(rmm, this, block_s - this->offset)) - return; + if (prev && prev->type == 0) { + prev->length += this->length; + region_put(rmm, this); + this = prev; } - /* split off the soon-to-be-untyped block(s) */ - block_l = rounddown(this->length, rmm->block_size); - if (block_l != this->length) { - this = region_split(rmm, this, block_l); - if (!this) - return; + if (next && next->type == 0) { + next->offset = this->offset; + next->length += this->length; + region_put(rmm, this); } - - /* mark as having no type, and retry merge with any adjacent - * untyped blocks - */ - this->type = 0; - nouveau_mm_merge(rmm, this); } int nouveau_mm_get(struct nouveau_mm *rmm, int type, u32 size, u32 size_nc, u32 align, struct nouveau_mm_node **pnode) { - struct nouveau_mm_node *this, *tmp, *next; - u32 splitoff, avail, alloc; - - list_for_each_entry_safe(this, tmp, &rmm->free, fl_entry) { - next = list_entry(this->nl_entry.next, struct nouveau_mm_node, nl_entry); - if (this->nl_entry.next == &rmm->nodes) - next = NULL; - - /* skip wrongly typed blocks */ - if (this->type && this->type != type) + struct nouveau_mm_node *prev, *this, *next; + u32 min = size_nc ? size_nc : size; + u32 align_mask = align - 1; + u32 splitoff; + u32 s, e; + + list_for_each_entry(this, &rmm->free, fl_entry) { + e = this->offset + this->length; + s = this->offset; + + prev = node(this, prev); + if (prev && prev->type != type) + s = roundup(s, rmm->block_size); + + next = node(this, next); + if (next && next->type != type) + e = rounddown(e, rmm->block_size); + + s = (s + align_mask) & ~align_mask; + e &= ~align_mask; + if (s > e || e - s < min) continue; - /* account for alignment */ - splitoff = this->offset & (align - 1); - if (splitoff) - splitoff = align - splitoff; - - if (this->length <= splitoff) - continue; - - /* determine total memory available from this, and - * the next block (if appropriate) - */ - avail = this->length; - if (next && next->free && (!next->type || next->type == type)) - avail += next->length; - - avail -= splitoff; - - /* determine allocation size */ - if (size_nc) { - alloc = min(avail, size); - alloc = rounddown(alloc, size_nc); - if (alloc == 0) - continue; - } else { - alloc = size; - if (avail < alloc) - continue; - } - - /* untyped block, split off a chunk that's a multiple - * of block_size and type it - */ - if (!this->type) { - u32 block = roundup(alloc + splitoff, rmm->block_size); - if (this->length < block) - continue; - - this = region_split(rmm, this, block); - if (!this) - return -ENOMEM; - - this->type = type; - } - - /* stealing memory from adjacent block */ - if (alloc > this->length) { - u32 amount = alloc - (this->length - splitoff); - - if (!next->type) { - amount = roundup(amount, rmm->block_size); - - next = region_split(rmm, next, amount); - if (!next) - return -ENOMEM; - - next->type = type; - } - - this->length += amount; - next->offset += amount; - next->length -= amount; - if (!next->length) { - list_del(&next->nl_entry); - list_del(&next->fl_entry); - kfree(next); - } - } - - if (splitoff) { - if (!region_split(rmm, this, splitoff)) - return -ENOMEM; - } + splitoff = s - this->offset; + if (splitoff && !region_split(rmm, this, splitoff)) + return -ENOMEM; - this = region_split(rmm, this, alloc); - if (this == NULL) + this = region_split(rmm, this, min(size, e - s)); + if (!this) return -ENOMEM; - this->free = false; + this->type = type; list_del(&this->fl_entry); *pnode = this; return 0; @@ -234,7 +135,6 @@ nouveau_mm_init(struct nouveau_mm **prmm, u32 offset, u32 length, u32 block) heap = kzalloc(sizeof(*heap), GFP_KERNEL); if (!heap) return -ENOMEM; - heap->free = true; heap->offset = roundup(offset, block); heap->length = rounddown(offset + length, block) - heap->offset; diff --git a/drivers/gpu/drm/nouveau/nouveau_mm.h b/drivers/gpu/drm/nouveau/nouveau_mm.h index af3844933036..798eaf39691c 100644 --- a/drivers/gpu/drm/nouveau/nouveau_mm.h +++ b/drivers/gpu/drm/nouveau/nouveau_mm.h @@ -30,9 +30,7 @@ struct nouveau_mm_node { struct list_head fl_entry; struct list_head rl_entry; - bool free; - int type; - + u8 type; u32 offset; u32 length; }; diff --git a/drivers/gpu/drm/nouveau/nv40_graph.c b/drivers/gpu/drm/nouveau/nv40_graph.c index 19ef92a0375a..8870d72388c8 100644 --- a/drivers/gpu/drm/nouveau/nv40_graph.c +++ b/drivers/gpu/drm/nouveau/nv40_graph.c @@ -451,8 +451,7 @@ nv40_graph_register(struct drm_device *dev) NVOBJ_CLASS(dev, 0x309e, GR); /* swzsurf */ /* curie */ - if (dev_priv->chipset >= 0x60 || - 0x00005450 & (1 << (dev_priv->chipset & 0x0f))) + if (nv44_graph_class(dev)) NVOBJ_CLASS(dev, 0x4497, GR); else NVOBJ_CLASS(dev, 0x4097, GR); diff --git a/drivers/gpu/drm/nouveau/nv40_grctx.c b/drivers/gpu/drm/nouveau/nv40_grctx.c index ce585093264e..f70447d131d7 100644 --- a/drivers/gpu/drm/nouveau/nv40_grctx.c +++ b/drivers/gpu/drm/nouveau/nv40_grctx.c @@ -118,17 +118,6 @@ */ static int -nv40_graph_4097(struct drm_device *dev) -{ - struct drm_nouveau_private *dev_priv = dev->dev_private; - - if ((dev_priv->chipset & 0xf0) == 0x60) - return 0; - - return !!(0x0baf & (1 << dev_priv->chipset)); -} - -static int nv40_graph_vs_count(struct drm_device *dev) { struct drm_nouveau_private *dev_priv = dev->dev_private; @@ -219,7 +208,7 @@ nv40_graph_construct_general(struct nouveau_grctx *ctx) gr_def(ctx, 0x4009dc, 0x80000000); } else { cp_ctx(ctx, 0x400840, 20); - if (!nv40_graph_4097(ctx->dev)) { + if (nv44_graph_class(ctx->dev)) { for (i = 0; i < 8; i++) gr_def(ctx, 0x400860 + (i * 4), 0x00000001); } @@ -228,7 +217,7 @@ nv40_graph_construct_general(struct nouveau_grctx *ctx) gr_def(ctx, 0x400888, 0x00000040); cp_ctx(ctx, 0x400894, 11); gr_def(ctx, 0x400894, 0x00000040); - if (nv40_graph_4097(ctx->dev)) { + if (!nv44_graph_class(ctx->dev)) { for (i = 0; i < 8; i++) gr_def(ctx, 0x4008a0 + (i * 4), 0x80000000); } @@ -546,7 +535,7 @@ nv40_graph_construct_state3d_2(struct nouveau_grctx *ctx) static void nv40_graph_construct_state3d_3(struct nouveau_grctx *ctx) { - int len = nv40_graph_4097(ctx->dev) ? 0x0684 : 0x0084; + int len = nv44_graph_class(ctx->dev) ? 0x0084 : 0x0684; cp_out (ctx, 0x300000); cp_lsr (ctx, len - 4); @@ -582,11 +571,11 @@ nv40_graph_construct_shader(struct nouveau_grctx *ctx) } else { b0_offset = 0x1d40/4; /* 2200 */ b1_offset = 0x3f40/4; /* 0b00 : 0a40 */ - vs_len = nv40_graph_4097(dev) ? 0x4a40/4 : 0x4980/4; + vs_len = nv44_graph_class(dev) ? 0x4980/4 : 0x4a40/4; } cp_lsr(ctx, vs_len * vs_nr + 0x300/4); - cp_out(ctx, nv40_graph_4097(dev) ? 0x800041 : 0x800029); + cp_out(ctx, nv44_graph_class(dev) ? 0x800029 : 0x800041); offset = ctx->ctxvals_pos; ctx->ctxvals_pos += (0x0300/4 + (vs_nr * vs_len)); diff --git a/drivers/gpu/drm/nouveau/nv40_mc.c b/drivers/gpu/drm/nouveau/nv40_mc.c index e4e72c12ab6a..03c0d4c3f355 100644 --- a/drivers/gpu/drm/nouveau/nv40_mc.c +++ b/drivers/gpu/drm/nouveau/nv40_mc.c @@ -6,27 +6,17 @@ int nv40_mc_init(struct drm_device *dev) { - struct drm_nouveau_private *dev_priv = dev->dev_private; - uint32_t tmp; - /* Power up everything, resetting each individual unit will * be done later if needed. */ nv_wr32(dev, NV03_PMC_ENABLE, 0xFFFFFFFF); - switch (dev_priv->chipset) { - case 0x44: - case 0x46: /* G72 */ - case 0x4e: - case 0x4c: /* C51_G7X */ - tmp = nv_rd32(dev, NV04_PFB_FIFO_DATA); + if (nv44_graph_class(dev)) { + u32 tmp = nv_rd32(dev, NV04_PFB_FIFO_DATA); nv_wr32(dev, NV40_PMC_1700, tmp); nv_wr32(dev, NV40_PMC_1704, 0); nv_wr32(dev, NV40_PMC_1708, 0); nv_wr32(dev, NV40_PMC_170C, tmp); - break; - default: - break; } return 0; diff --git a/drivers/gpu/drm/nouveau/nv50_instmem.c b/drivers/gpu/drm/nouveau/nv50_instmem.c index 2e1b1cd19a4b..ea0041810ae3 100644 --- a/drivers/gpu/drm/nouveau/nv50_instmem.c +++ b/drivers/gpu/drm/nouveau/nv50_instmem.c @@ -332,8 +332,11 @@ nv50_instmem_get(struct nouveau_gpuobj *gpuobj, u32 size, u32 align) gpuobj->vinst = node->vram->offset; if (gpuobj->flags & NVOBJ_FLAG_VM) { - ret = nouveau_vm_get(dev_priv->chan_vm, size, 12, - NV_MEM_ACCESS_RW | NV_MEM_ACCESS_SYS, + u32 flags = NV_MEM_ACCESS_RW; + if (!(gpuobj->flags & NVOBJ_FLAG_VM_USER)) + flags |= NV_MEM_ACCESS_SYS; + + ret = nouveau_vm_get(dev_priv->chan_vm, size, 12, flags, &node->chan_vma); if (ret) { vram->put(dev, &node->vram); diff --git a/drivers/gpu/drm/nouveau/nvc0_graph.c b/drivers/gpu/drm/nouveau/nvc0_graph.c index 5feacd5d5fa4..e6ea7d83187f 100644 --- a/drivers/gpu/drm/nouveau/nvc0_graph.c +++ b/drivers/gpu/drm/nouveau/nvc0_graph.c @@ -105,7 +105,8 @@ nvc0_graph_create_context_mmio_list(struct nouveau_channel *chan) if (ret) return ret; - ret = nouveau_gpuobj_new(dev, NULL, 384 * 1024, 4096, NVOBJ_FLAG_VM, + ret = nouveau_gpuobj_new(dev, NULL, 384 * 1024, 4096, + NVOBJ_FLAG_VM | NVOBJ_FLAG_VM_USER, &grch->unk418810); if (ret) return ret; diff --git a/drivers/gpu/drm/nouveau/nvc0_vm.c b/drivers/gpu/drm/nouveau/nvc0_vm.c index 4b9251bb0ff4..e4e83c2caf5b 100644 --- a/drivers/gpu/drm/nouveau/nvc0_vm.c +++ b/drivers/gpu/drm/nouveau/nvc0_vm.c @@ -48,8 +48,8 @@ nvc0_vm_addr(struct nouveau_vma *vma, u64 phys, u32 memtype, u32 target) phys >>= 8; phys |= 0x00000001; /* present */ -// if (vma->access & NV_MEM_ACCESS_SYS) -// phys |= 0x00000002; + if (vma->access & NV_MEM_ACCESS_SYS) + phys |= 0x00000002; phys |= ((u64)target << 32); phys |= ((u64)memtype << 36); diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index 7fe8ebdcdc0e..a8973acb3987 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -3002,31 +3002,6 @@ int evergreen_copy_blit(struct radeon_device *rdev, return 0; } -static bool evergreen_card_posted(struct radeon_device *rdev) -{ - u32 reg; - - /* first check CRTCs */ - if (rdev->flags & RADEON_IS_IGP) - reg = RREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET) | - RREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET); - else - reg = RREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET) | - RREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET) | - RREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET) | - RREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET) | - RREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET) | - RREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET); - if (reg & EVERGREEN_CRTC_MASTER_EN) - return true; - - /* then check MEM_SIZE, in case the crtcs are off */ - if (RREG32(CONFIG_MEMSIZE)) - return true; - - return false; -} - /* Plan is to move initialization in that function and use * helper function so that radeon_device_init pretty much * do nothing more than calling asic specific function. This @@ -3063,7 +3038,7 @@ int evergreen_init(struct radeon_device *rdev) if (radeon_asic_reset(rdev)) dev_warn(rdev->dev, "GPU reset failed !\n"); /* Post card if necessary */ - if (!evergreen_card_posted(rdev)) { + if (!radeon_card_posted(rdev)) { if (!rdev->bios) { dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n"); return -EINVAL; @@ -3158,6 +3133,9 @@ static void evergreen_pcie_gen2_enable(struct radeon_device *rdev) { u32 link_width_cntl, speed_cntl; + if (radeon_pcie_gen2 == 0) + return; + if (rdev->flags & RADEON_IS_IGP) return; diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index f637595b14e1..46da5142b131 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -2086,12 +2086,13 @@ int r100_asic_reset(struct radeon_device *rdev) { struct r100_mc_save save; u32 status, tmp; + int ret = 0; - r100_mc_stop(rdev, &save); status = RREG32(R_000E40_RBBM_STATUS); if (!G_000E40_GUI_ACTIVE(status)) { return 0; } + r100_mc_stop(rdev, &save); status = RREG32(R_000E40_RBBM_STATUS); dev_info(rdev->dev, "(%s:%d) RBBM_STATUS=0x%08X\n", __func__, __LINE__, status); /* stop CP */ @@ -2131,11 +2132,11 @@ int r100_asic_reset(struct radeon_device *rdev) G_000E40_TAM_BUSY(status) || G_000E40_PB_BUSY(status)) { dev_err(rdev->dev, "failed to reset GPU\n"); rdev->gpu_lockup = true; - return -1; - } + ret = -1; + } else + dev_info(rdev->dev, "GPU reset succeed\n"); r100_mc_resume(rdev, &save); - dev_info(rdev->dev, "GPU reset succeed\n"); - return 0; + return ret; } void r100_set_common_regs(struct radeon_device *rdev) diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c index fae5e709f270..cf862ca580bf 100644 --- a/drivers/gpu/drm/radeon/r300.c +++ b/drivers/gpu/drm/radeon/r300.c @@ -405,12 +405,13 @@ int r300_asic_reset(struct radeon_device *rdev) { struct r100_mc_save save; u32 status, tmp; + int ret = 0; - r100_mc_stop(rdev, &save); status = RREG32(R_000E40_RBBM_STATUS); if (!G_000E40_GUI_ACTIVE(status)) { return 0; } + r100_mc_stop(rdev, &save); status = RREG32(R_000E40_RBBM_STATUS); dev_info(rdev->dev, "(%s:%d) RBBM_STATUS=0x%08X\n", __func__, __LINE__, status); /* stop CP */ @@ -451,11 +452,11 @@ int r300_asic_reset(struct radeon_device *rdev) if (G_000E40_GA_BUSY(status) || G_000E40_VAP_BUSY(status)) { dev_err(rdev->dev, "failed to reset GPU\n"); rdev->gpu_lockup = true; - return -1; - } + ret = -1; + } else + dev_info(rdev->dev, "GPU reset succeed\n"); r100_mc_resume(rdev, &save); - dev_info(rdev->dev, "GPU reset succeed\n"); - return 0; + return ret; } /* diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index 6b50716267c0..aca2236268fa 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -2358,24 +2358,6 @@ void r600_clear_surface_reg(struct radeon_device *rdev, int reg) /* FIXME: implement */ } - -bool r600_card_posted(struct radeon_device *rdev) -{ - uint32_t reg; - - /* first check CRTCs */ - reg = RREG32(D1CRTC_CONTROL) | - RREG32(D2CRTC_CONTROL); - if (reg & CRTC_EN) - return true; - - /* then check MEM_SIZE, in case the crtcs are off */ - if (RREG32(CONFIG_MEMSIZE)) - return true; - - return false; -} - int r600_startup(struct radeon_device *rdev) { int r; @@ -2536,7 +2518,7 @@ int r600_init(struct radeon_device *rdev) if (r) return r; /* Post card if necessary */ - if (!r600_card_posted(rdev)) { + if (!radeon_card_posted(rdev)) { if (!rdev->bios) { dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n"); return -EINVAL; @@ -3658,6 +3640,9 @@ static void r600_pcie_gen2_enable(struct radeon_device *rdev) u32 link_width_cntl, lanes, speed_cntl, training_cntl, tmp; u16 link_cntl2; + if (radeon_pcie_gen2 == 0) + return; + if (rdev->flags & RADEON_IS_IGP) return; diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index e9486630a467..71d2a554bbe6 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -92,6 +92,7 @@ extern int radeon_tv; extern int radeon_audio; extern int radeon_disp_priority; extern int radeon_hw_i2c; +extern int radeon_pcie_gen2; /* * Copy from radeon_drv.h so we don't have to include both and have conflicting diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index be5cb4f28c29..d5680a0c87af 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -104,6 +104,7 @@ int radeon_tv = 1; int radeon_audio = 1; int radeon_disp_priority = 0; int radeon_hw_i2c = 0; +int radeon_pcie_gen2 = 0; MODULE_PARM_DESC(no_wb, "Disable AGP writeback for scratch registers"); module_param_named(no_wb, radeon_no_wb, int, 0444); @@ -147,6 +148,9 @@ module_param_named(disp_priority, radeon_disp_priority, int, 0444); MODULE_PARM_DESC(hw_i2c, "hw i2c engine enable (0 = disable)"); module_param_named(hw_i2c, radeon_hw_i2c, int, 0444); +MODULE_PARM_DESC(pcie_gen2, "PCIE Gen2 mode (1 = enable)"); +module_param_named(pcie_gen2, radeon_pcie_gen2, int, 0444); + static int radeon_suspend(struct drm_device *dev, pm_message_t state) { drm_radeon_private_t *dev_priv = dev->dev_private; diff --git a/drivers/gpu/drm/radeon/reg_srcs/evergreen b/drivers/gpu/drm/radeon/reg_srcs/evergreen index ac40fd39d787..9177f9191837 100644 --- a/drivers/gpu/drm/radeon/reg_srcs/evergreen +++ b/drivers/gpu/drm/radeon/reg_srcs/evergreen @@ -439,7 +439,7 @@ evergreen 0x9400 0x000286EC SPI_COMPUTE_NUM_THREAD_X 0x000286F0 SPI_COMPUTE_NUM_THREAD_Y 0x000286F4 SPI_COMPUTE_NUM_THREAD_Z -0x000286F8 GDS_ADDR_SIZE +0x00028724 GDS_ADDR_SIZE 0x00028780 CB_BLEND0_CONTROL 0x00028784 CB_BLEND1_CONTROL 0x00028788 CB_BLEND2_CONTROL diff --git a/drivers/gpu/drm/radeon/rs600.c b/drivers/gpu/drm/radeon/rs600.c index b4192acaab5f..5afe294ed51f 100644 --- a/drivers/gpu/drm/radeon/rs600.c +++ b/drivers/gpu/drm/radeon/rs600.c @@ -339,16 +339,16 @@ void rs600_bm_disable(struct radeon_device *rdev) int rs600_asic_reset(struct radeon_device *rdev) { - u32 status, tmp; - struct rv515_mc_save save; + u32 status, tmp; + int ret = 0; - /* Stops all mc clients */ - rv515_mc_stop(rdev, &save); status = RREG32(R_000E40_RBBM_STATUS); if (!G_000E40_GUI_ACTIVE(status)) { return 0; } + /* Stops all mc clients */ + rv515_mc_stop(rdev, &save); status = RREG32(R_000E40_RBBM_STATUS); dev_info(rdev->dev, "(%s:%d) RBBM_STATUS=0x%08X\n", __func__, __LINE__, status); /* stop CP */ @@ -392,11 +392,11 @@ int rs600_asic_reset(struct radeon_device *rdev) if (G_000E40_GA_BUSY(status) || G_000E40_VAP_BUSY(status)) { dev_err(rdev->dev, "failed to reset GPU\n"); rdev->gpu_lockup = true; - return -1; - } + ret = -1; + } else + dev_info(rdev->dev, "GPU reset succeed\n"); rv515_mc_resume(rdev, &save); - dev_info(rdev->dev, "GPU reset succeed\n"); - return 0; + return ret; } /* diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c index 3a264aa3a79a..491dc9000655 100644 --- a/drivers/gpu/drm/radeon/rv770.c +++ b/drivers/gpu/drm/radeon/rv770.c @@ -1268,7 +1268,7 @@ int rv770_init(struct radeon_device *rdev) if (r) return r; /* Post card if necessary */ - if (!r600_card_posted(rdev)) { + if (!radeon_card_posted(rdev)) { if (!rdev->bios) { dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n"); return -EINVAL; @@ -1372,6 +1372,9 @@ static void rv770_pcie_gen2_enable(struct radeon_device *rdev) u32 link_width_cntl, lanes, speed_cntl, tmp; u16 link_cntl2; + if (radeon_pcie_gen2 == 0) + return; + if (rdev->flags & RADEON_IS_IGP) return; diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index 68883565b725..f9ba7d74dfc0 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -308,7 +308,7 @@ static void ib_cache_event(struct ib_event_handler *handler, INIT_WORK(&work->work, ib_cache_task); work->device = event->device; work->port_num = event->element.port_num; - schedule_work(&work->work); + queue_work(ib_wq, &work->work); } } } @@ -368,7 +368,7 @@ static void ib_cache_cleanup_one(struct ib_device *device) int p; ib_unregister_event_handler(&device->cache.event_handler); - flush_scheduled_work(); + flush_workqueue(ib_wq); for (p = 0; p <= end_port(device) - start_port(device); ++p) { kfree(device->cache.pkey_cache[p]); diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index a19effad0811..f793bf2f5da7 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -38,7 +38,6 @@ #include <linux/slab.h> #include <linux/init.h> #include <linux/mutex.h> -#include <linux/workqueue.h> #include "core_priv.h" @@ -52,6 +51,9 @@ struct ib_client_data { void * data; }; +struct workqueue_struct *ib_wq; +EXPORT_SYMBOL_GPL(ib_wq); + static LIST_HEAD(device_list); static LIST_HEAD(client_list); @@ -718,6 +720,10 @@ static int __init ib_core_init(void) { int ret; + ib_wq = alloc_workqueue("infiniband", 0, 0); + if (!ib_wq) + return -ENOMEM; + ret = ib_sysfs_setup(); if (ret) printk(KERN_WARNING "Couldn't create InfiniBand device class\n"); @@ -726,6 +732,7 @@ static int __init ib_core_init(void) if (ret) { printk(KERN_WARNING "Couldn't set up InfiniBand P_Key/GID cache\n"); ib_sysfs_cleanup(); + destroy_workqueue(ib_wq); } return ret; @@ -736,7 +743,7 @@ static void __exit ib_core_cleanup(void) ib_cache_cleanup(); ib_sysfs_cleanup(); /* Make sure that any pending umem accounting work is done. */ - flush_scheduled_work(); + destroy_workqueue(ib_wq); } module_init(ib_core_init); diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 91a660310b7c..e38be1bcc01c 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -425,7 +425,7 @@ static void ib_sa_event(struct ib_event_handler *handler, struct ib_event *event port->sm_ah = NULL; spin_unlock_irqrestore(&port->ah_lock, flags); - schedule_work(&sa_dev->port[event->element.port_num - + queue_work(ib_wq, &sa_dev->port[event->element.port_num - sa_dev->start_port].update_task); } } diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index 415e186eee32..b645e558876f 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -262,7 +262,7 @@ void ib_umem_release(struct ib_umem *umem) umem->mm = mm; umem->diff = diff; - schedule_work(&umem->work); + queue_work(ib_wq, &umem->work); return; } } else diff --git a/drivers/infiniband/hw/amso1100/c2_rnic.c b/drivers/infiniband/hw/amso1100/c2_rnic.c index 85cfae4cad71..8c81992fa6db 100644 --- a/drivers/infiniband/hw/amso1100/c2_rnic.c +++ b/drivers/infiniband/hw/amso1100/c2_rnic.c @@ -459,13 +459,12 @@ int __devinit c2_rnic_init(struct c2_dev *c2dev) IB_DEVICE_MEM_WINDOW); /* Allocate the qptr_array */ - c2dev->qptr_array = vmalloc(C2_MAX_CQS * sizeof(void *)); + c2dev->qptr_array = vzalloc(C2_MAX_CQS * sizeof(void *)); if (!c2dev->qptr_array) { return -ENOMEM; } - /* Inialize the qptr_array */ - memset(c2dev->qptr_array, 0, C2_MAX_CQS * sizeof(void *)); + /* Initialize the qptr_array */ c2dev->qptr_array[0] = (void *) &c2dev->req_vq; c2dev->qptr_array[1] = (void *) &c2dev->rep_vq; c2dev->qptr_array[2] = (void *) &c2dev->aeq; diff --git a/drivers/infiniband/hw/ehca/ipz_pt_fn.c b/drivers/infiniband/hw/ehca/ipz_pt_fn.c index 1596e3085344..1898d6e7cce5 100644 --- a/drivers/infiniband/hw/ehca/ipz_pt_fn.c +++ b/drivers/infiniband/hw/ehca/ipz_pt_fn.c @@ -222,15 +222,14 @@ int ipz_queue_ctor(struct ehca_pd *pd, struct ipz_queue *queue, queue->small_page = NULL; /* allocate queue page pointers */ - queue->queue_pages = kmalloc(nr_of_pages * sizeof(void *), GFP_KERNEL); + queue->queue_pages = kzalloc(nr_of_pages * sizeof(void *), GFP_KERNEL); if (!queue->queue_pages) { - queue->queue_pages = vmalloc(nr_of_pages * sizeof(void *)); + queue->queue_pages = vzalloc(nr_of_pages * sizeof(void *)); if (!queue->queue_pages) { ehca_gen_err("Couldn't allocate queue page list"); return 0; } } - memset(queue->queue_pages, 0, nr_of_pages * sizeof(void *)); /* allocate actual queue pages */ if (is_small) { diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c index b33f0457a1ff..47db4bf34628 100644 --- a/drivers/infiniband/hw/ipath/ipath_driver.c +++ b/drivers/infiniband/hw/ipath/ipath_driver.c @@ -199,12 +199,11 @@ static struct ipath_devdata *ipath_alloc_devdata(struct pci_dev *pdev) goto bail; } - dd = vmalloc(sizeof(*dd)); + dd = vzalloc(sizeof(*dd)); if (!dd) { dd = ERR_PTR(-ENOMEM); goto bail; } - memset(dd, 0, sizeof(*dd)); dd->ipath_unit = -1; spin_lock_irqsave(&ipath_devs_lock, flags); @@ -756,7 +755,7 @@ static void __devexit ipath_remove_one(struct pci_dev *pdev) */ ipath_shutdown_device(dd); - flush_scheduled_work(); + flush_workqueue(ib_wq); if (dd->verbs_dev) ipath_unregister_ib_device(dd->verbs_dev); diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c index 9292a15ad7c4..6d4b29c4cd89 100644 --- a/drivers/infiniband/hw/ipath/ipath_file_ops.c +++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c @@ -1530,7 +1530,7 @@ static int init_subports(struct ipath_devdata *dd, } num_subports = uinfo->spu_subport_cnt; - pd->subport_uregbase = vmalloc(PAGE_SIZE * num_subports); + pd->subport_uregbase = vzalloc(PAGE_SIZE * num_subports); if (!pd->subport_uregbase) { ret = -ENOMEM; goto bail; @@ -1538,13 +1538,13 @@ static int init_subports(struct ipath_devdata *dd, /* Note: pd->port_rcvhdrq_size isn't initialized yet. */ size = ALIGN(dd->ipath_rcvhdrcnt * dd->ipath_rcvhdrentsize * sizeof(u32), PAGE_SIZE) * num_subports; - pd->subport_rcvhdr_base = vmalloc(size); + pd->subport_rcvhdr_base = vzalloc(size); if (!pd->subport_rcvhdr_base) { ret = -ENOMEM; goto bail_ureg; } - pd->subport_rcvegrbuf = vmalloc(pd->port_rcvegrbuf_chunks * + pd->subport_rcvegrbuf = vzalloc(pd->port_rcvegrbuf_chunks * pd->port_rcvegrbuf_size * num_subports); if (!pd->subport_rcvegrbuf) { @@ -1556,11 +1556,6 @@ static int init_subports(struct ipath_devdata *dd, pd->port_subport_id = uinfo->spu_subport_id; pd->active_slaves = 1; set_bit(IPATH_PORT_MASTER_UNINIT, &pd->port_flag); - memset(pd->subport_uregbase, 0, PAGE_SIZE * num_subports); - memset(pd->subport_rcvhdr_base, 0, size); - memset(pd->subport_rcvegrbuf, 0, pd->port_rcvegrbuf_chunks * - pd->port_rcvegrbuf_size * - num_subports); goto bail; bail_rhdr: diff --git a/drivers/infiniband/hw/ipath/ipath_init_chip.c b/drivers/infiniband/hw/ipath/ipath_init_chip.c index 776938299e4c..fef0f4201257 100644 --- a/drivers/infiniband/hw/ipath/ipath_init_chip.c +++ b/drivers/infiniband/hw/ipath/ipath_init_chip.c @@ -442,7 +442,7 @@ static void init_shadow_tids(struct ipath_devdata *dd) struct page **pages; dma_addr_t *addrs; - pages = vmalloc(dd->ipath_cfgports * dd->ipath_rcvtidcnt * + pages = vzalloc(dd->ipath_cfgports * dd->ipath_rcvtidcnt * sizeof(struct page *)); if (!pages) { ipath_dev_err(dd, "failed to allocate shadow page * " @@ -461,9 +461,6 @@ static void init_shadow_tids(struct ipath_devdata *dd) return; } - memset(pages, 0, dd->ipath_cfgports * dd->ipath_rcvtidcnt * - sizeof(struct page *)); - dd->ipath_pageshadow = pages; dd->ipath_physshadow = addrs; } diff --git a/drivers/infiniband/hw/ipath/ipath_user_pages.c b/drivers/infiniband/hw/ipath/ipath_user_pages.c index 5e86d73eba2a..bab9f74c0665 100644 --- a/drivers/infiniband/hw/ipath/ipath_user_pages.c +++ b/drivers/infiniband/hw/ipath/ipath_user_pages.c @@ -220,7 +220,7 @@ void ipath_release_user_pages_on_close(struct page **p, size_t num_pages) work->mm = mm; work->num_pages = num_pages; - schedule_work(&work->work); + queue_work(ib_wq, &work->work); return; bail_mm: diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 4c85224aeaa7..c7a6213c6996 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -623,8 +623,9 @@ static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) struct mlx4_ib_dev *mdev = to_mdev(ibqp->device); struct mlx4_ib_qp *mqp = to_mqp(ibqp); - err = mlx4_multicast_attach(mdev->dev, &mqp->mqp, gid->raw, !!(mqp->flags & - MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK)); + err = mlx4_multicast_attach(mdev->dev, &mqp->mqp, gid->raw, + !!(mqp->flags & MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK), + MLX4_PROTOCOL_IB); if (err) return err; @@ -635,7 +636,7 @@ static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) return 0; err_add: - mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw); + mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw, MLX4_PROTOCOL_IB); return err; } @@ -665,7 +666,7 @@ static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) struct mlx4_ib_gid_entry *ge; err = mlx4_multicast_detach(mdev->dev, - &mqp->mqp, gid->raw); + &mqp->mqp, gid->raw, MLX4_PROTOCOL_IB); if (err) return err; @@ -1005,7 +1006,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) if (mlx4_uar_alloc(dev, &ibdev->priv_uar)) goto err_pd; - ibdev->uar_map = ioremap(ibdev->priv_uar.pfn << PAGE_SHIFT, PAGE_SIZE); + ibdev->uar_map = ioremap((phys_addr_t) ibdev->priv_uar.pfn << PAGE_SHIFT, + PAGE_SIZE); if (!ibdev->uar_map) goto err_uar; MLX4_INIT_DOORBELL_LOCK(&ibdev->uar_lock); diff --git a/drivers/infiniband/hw/mthca/mthca_catas.c b/drivers/infiniband/hw/mthca/mthca_catas.c index 0aa0110e4b6c..e4a08c2819e4 100644 --- a/drivers/infiniband/hw/mthca/mthca_catas.c +++ b/drivers/infiniband/hw/mthca/mthca_catas.c @@ -146,7 +146,7 @@ static void poll_catas(unsigned long dev_ptr) void mthca_start_catas_poll(struct mthca_dev *dev) { - unsigned long addr; + phys_addr_t addr; init_timer(&dev->catas_err.timer); dev->catas_err.map = NULL; @@ -158,7 +158,8 @@ void mthca_start_catas_poll(struct mthca_dev *dev) dev->catas_err.map = ioremap(addr, dev->catas_err.size * 4); if (!dev->catas_err.map) { mthca_warn(dev, "couldn't map catastrophic error region " - "at 0x%lx/0x%x\n", addr, dev->catas_err.size * 4); + "at 0x%llx/0x%x\n", (unsigned long long) addr, + dev->catas_err.size * 4); return; } diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c index f4ceecd9684b..7bfa2a164955 100644 --- a/drivers/infiniband/hw/mthca/mthca_cmd.c +++ b/drivers/infiniband/hw/mthca/mthca_cmd.c @@ -713,7 +713,7 @@ int mthca_RUN_FW(struct mthca_dev *dev, u8 *status) static void mthca_setup_cmd_doorbells(struct mthca_dev *dev, u64 base) { - unsigned long addr; + phys_addr_t addr; u16 max_off = 0; int i; diff --git a/drivers/infiniband/hw/mthca/mthca_eq.c b/drivers/infiniband/hw/mthca/mthca_eq.c index 8e8c728aff88..76785c653c13 100644 --- a/drivers/infiniband/hw/mthca/mthca_eq.c +++ b/drivers/infiniband/hw/mthca/mthca_eq.c @@ -653,7 +653,7 @@ static int mthca_map_reg(struct mthca_dev *dev, unsigned long offset, unsigned long size, void __iomem **map) { - unsigned long base = pci_resource_start(dev->pdev, 0); + phys_addr_t base = pci_resource_start(dev->pdev, 0); *map = ioremap(base + offset, size); if (!*map) diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c index 5eee6665919a..8a40cd539ab1 100644 --- a/drivers/infiniband/hw/mthca/mthca_main.c +++ b/drivers/infiniband/hw/mthca/mthca_main.c @@ -790,7 +790,7 @@ static int mthca_setup_hca(struct mthca_dev *dev) goto err_uar_table_free; } - dev->kar = ioremap(dev->driver_uar.pfn << PAGE_SHIFT, PAGE_SIZE); + dev->kar = ioremap((phys_addr_t) dev->driver_uar.pfn << PAGE_SHIFT, PAGE_SIZE); if (!dev->kar) { mthca_err(dev, "Couldn't map kernel access region, " "aborting.\n"); diff --git a/drivers/infiniband/hw/mthca/mthca_mr.c b/drivers/infiniband/hw/mthca/mthca_mr.c index 065b20899876..44045c8846db 100644 --- a/drivers/infiniband/hw/mthca/mthca_mr.c +++ b/drivers/infiniband/hw/mthca/mthca_mr.c @@ -853,7 +853,7 @@ void mthca_arbel_fmr_unmap(struct mthca_dev *dev, struct mthca_fmr *fmr) int mthca_init_mr_table(struct mthca_dev *dev) { - unsigned long addr; + phys_addr_t addr; int mpts, mtts, err, i; err = mthca_alloc_init(&dev->mr_table.mpt_alloc, diff --git a/drivers/infiniband/hw/nes/nes.c b/drivers/infiniband/hw/nes/nes.c index 0c9f0aa5d4ea..3b4ec3238ceb 100644 --- a/drivers/infiniband/hw/nes/nes.c +++ b/drivers/infiniband/hw/nes/nes.c @@ -144,6 +144,7 @@ static int nes_inetaddr_event(struct notifier_block *notifier, struct nes_device *nesdev; struct net_device *netdev; struct nes_vnic *nesvnic; + unsigned int is_bonded; nes_debug(NES_DBG_NETDEV, "nes_inetaddr_event: ip address %pI4, netmask %pI4.\n", &ifa->ifa_address, &ifa->ifa_mask); @@ -152,7 +153,8 @@ static int nes_inetaddr_event(struct notifier_block *notifier, nesdev, nesdev->netdev[0]->name); netdev = nesdev->netdev[0]; nesvnic = netdev_priv(netdev); - if (netdev == event_netdev) { + is_bonded = (netdev->master == event_netdev); + if ((netdev == event_netdev) || is_bonded) { if (nesvnic->rdma_enabled == 0) { nes_debug(NES_DBG_NETDEV, "Returning without processing event for %s since" " RDMA is not enabled.\n", @@ -169,7 +171,10 @@ static int nes_inetaddr_event(struct notifier_block *notifier, nes_manage_arp_cache(netdev, netdev->dev_addr, ntohl(nesvnic->local_ipaddr), NES_ARP_DELETE); nesvnic->local_ipaddr = 0; - return NOTIFY_OK; + if (is_bonded) + continue; + else + return NOTIFY_OK; break; case NETDEV_UP: nes_debug(NES_DBG_NETDEV, "event:UP\n"); @@ -178,15 +183,24 @@ static int nes_inetaddr_event(struct notifier_block *notifier, nes_debug(NES_DBG_NETDEV, "Interface already has local_ipaddr\n"); return NOTIFY_OK; } + /* fall through */ + case NETDEV_CHANGEADDR: /* Add the address to the IP table */ - nesvnic->local_ipaddr = ifa->ifa_address; + if (netdev->master) + nesvnic->local_ipaddr = + ((struct in_device *)netdev->master->ip_ptr)->ifa_list->ifa_address; + else + nesvnic->local_ipaddr = ifa->ifa_address; nes_write_indexed(nesdev, NES_IDX_DST_IP_ADDR+(0x10*PCI_FUNC(nesdev->pcidev->devfn)), - ntohl(ifa->ifa_address)); + ntohl(nesvnic->local_ipaddr)); nes_manage_arp_cache(netdev, netdev->dev_addr, ntohl(nesvnic->local_ipaddr), NES_ARP_ADD); - return NOTIFY_OK; + if (is_bonded) + continue; + else + return NOTIFY_OK; break; default: break; @@ -660,6 +674,8 @@ static int __devinit nes_probe(struct pci_dev *pcidev, const struct pci_device_i } nes_notifiers_registered++; + INIT_DELAYED_WORK(&nesdev->work, nes_recheck_link_status); + /* Initialize network devices */ if ((netdev = nes_netdev_init(nesdev, mmio_regs)) == NULL) goto bail7; @@ -742,6 +758,7 @@ static void __devexit nes_remove(struct pci_dev *pcidev) struct nes_device *nesdev = pci_get_drvdata(pcidev); struct net_device *netdev; int netdev_index = 0; + unsigned long flags; if (nesdev->netdev_count) { netdev = nesdev->netdev[netdev_index]; @@ -768,6 +785,14 @@ static void __devexit nes_remove(struct pci_dev *pcidev) free_irq(pcidev->irq, nesdev); tasklet_kill(&nesdev->dpc_tasklet); + spin_lock_irqsave(&nesdev->nesadapter->phy_lock, flags); + if (nesdev->link_recheck) { + spin_unlock_irqrestore(&nesdev->nesadapter->phy_lock, flags); + cancel_delayed_work_sync(&nesdev->work); + } else { + spin_unlock_irqrestore(&nesdev->nesadapter->phy_lock, flags); + } + /* Deallocate the Adapter Structure */ nes_destroy_adapter(nesdev->nesadapter); diff --git a/drivers/infiniband/hw/nes/nes.h b/drivers/infiniband/hw/nes/nes.h index b3d145e82b4c..6fe79876009e 100644 --- a/drivers/infiniband/hw/nes/nes.h +++ b/drivers/infiniband/hw/nes/nes.h @@ -268,6 +268,9 @@ struct nes_device { u8 napi_isr_ran; u8 disable_rx_flow_control; u8 disable_tx_flow_control; + + struct delayed_work work; + u8 link_recheck; }; @@ -507,6 +510,7 @@ void nes_nic_ce_handler(struct nes_device *, struct nes_hw_nic_cq *); void nes_iwarp_ce_handler(struct nes_device *, struct nes_hw_cq *); int nes_destroy_cqp(struct nes_device *); int nes_nic_cm_xmit(struct sk_buff *, struct net_device *); +void nes_recheck_link_status(struct work_struct *work); /* nes_nic.c */ struct net_device *nes_netdev_init(struct nes_device *, void __iomem *); diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c index 25ad0f9944c0..009ec814d517 100644 --- a/drivers/infiniband/hw/nes/nes_cm.c +++ b/drivers/infiniband/hw/nes/nes_cm.c @@ -1107,6 +1107,7 @@ static int nes_addr_resolve_neigh(struct nes_vnic *nesvnic, u32 dst_ip, int arpi struct flowi fl; struct neighbour *neigh; int rc = arpindex; + struct net_device *netdev; struct nes_adapter *nesadapter = nesvnic->nesdev->nesadapter; memset(&fl, 0, sizeof fl); @@ -1117,7 +1118,12 @@ static int nes_addr_resolve_neigh(struct nes_vnic *nesvnic, u32 dst_ip, int arpi return rc; } - neigh = neigh_lookup(&arp_tbl, &rt->rt_gateway, nesvnic->netdev); + if (nesvnic->netdev->master) + netdev = nesvnic->netdev->master; + else + netdev = nesvnic->netdev; + + neigh = neigh_lookup(&arp_tbl, &rt->rt_gateway, netdev); if (neigh) { if (neigh->nud_state & NUD_VALID) { nes_debug(NES_DBG_CM, "Neighbor MAC address for 0x%08X" diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c index 1980a461c499..8b606fd64022 100644 --- a/drivers/infiniband/hw/nes/nes_hw.c +++ b/drivers/infiniband/hw/nes/nes_hw.c @@ -2608,6 +2608,13 @@ static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number) netif_start_queue(nesvnic->netdev); nesvnic->linkup = 1; netif_carrier_on(nesvnic->netdev); + + spin_lock(&nesvnic->port_ibevent_lock); + if (nesdev->iw_status == 0) { + nesdev->iw_status = 1; + nes_port_ibevent(nesvnic); + } + spin_unlock(&nesvnic->port_ibevent_lock); } } } else { @@ -2633,9 +2640,23 @@ static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number) netif_stop_queue(nesvnic->netdev); nesvnic->linkup = 0; netif_carrier_off(nesvnic->netdev); + + spin_lock(&nesvnic->port_ibevent_lock); + if (nesdev->iw_status == 1) { + nesdev->iw_status = 0; + nes_port_ibevent(nesvnic); + } + spin_unlock(&nesvnic->port_ibevent_lock); } } } + if (nesadapter->phy_type[mac_index] == NES_PHY_TYPE_SFP_D) { + if (nesdev->link_recheck) + cancel_delayed_work(&nesdev->work); + nesdev->link_recheck = 1; + schedule_delayed_work(&nesdev->work, + NES_LINK_RECHECK_DELAY); + } } spin_unlock_irqrestore(&nesadapter->phy_lock, flags); @@ -2643,6 +2664,80 @@ static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number) nesadapter->mac_sw_state[mac_number] = NES_MAC_SW_IDLE; } +void nes_recheck_link_status(struct work_struct *work) +{ + unsigned long flags; + struct nes_device *nesdev = container_of(work, struct nes_device, work.work); + struct nes_adapter *nesadapter = nesdev->nesadapter; + struct nes_vnic *nesvnic; + u32 mac_index = nesdev->mac_index; + u16 phy_data; + u16 temp_phy_data; + + spin_lock_irqsave(&nesadapter->phy_lock, flags); + + /* check link status */ + nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 0x9003); + temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); + + nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 3, 0x0021); + nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); + nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 3, 0x0021); + phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); + + phy_data = (!temp_phy_data && (phy_data == 0x8000)) ? 0x4 : 0x0; + + nes_debug(NES_DBG_PHY, "%s: Phy data = 0x%04X, link was %s.\n", + __func__, phy_data, + nesadapter->mac_link_down[mac_index] ? "DOWN" : "UP"); + + if (phy_data & 0x0004) { + nesadapter->mac_link_down[mac_index] = 0; + list_for_each_entry(nesvnic, &nesadapter->nesvnic_list[mac_index], list) { + if (nesvnic->linkup == 0) { + printk(PFX "The Link is now up for port %s, netdev %p.\n", + nesvnic->netdev->name, nesvnic->netdev); + if (netif_queue_stopped(nesvnic->netdev)) + netif_start_queue(nesvnic->netdev); + nesvnic->linkup = 1; + netif_carrier_on(nesvnic->netdev); + + spin_lock(&nesvnic->port_ibevent_lock); + if (nesdev->iw_status == 0) { + nesdev->iw_status = 1; + nes_port_ibevent(nesvnic); + } + spin_unlock(&nesvnic->port_ibevent_lock); + } + } + + } else { + nesadapter->mac_link_down[mac_index] = 1; + list_for_each_entry(nesvnic, &nesadapter->nesvnic_list[mac_index], list) { + if (nesvnic->linkup == 1) { + printk(PFX "The Link is now down for port %s, netdev %p.\n", + nesvnic->netdev->name, nesvnic->netdev); + if (!(netif_queue_stopped(nesvnic->netdev))) + netif_stop_queue(nesvnic->netdev); + nesvnic->linkup = 0; + netif_carrier_off(nesvnic->netdev); + + spin_lock(&nesvnic->port_ibevent_lock); + if (nesdev->iw_status == 1) { + nesdev->iw_status = 0; + nes_port_ibevent(nesvnic); + } + spin_unlock(&nesvnic->port_ibevent_lock); + } + } + } + if (nesdev->link_recheck++ < NES_LINK_RECHECK_MAX) + schedule_delayed_work(&nesdev->work, NES_LINK_RECHECK_DELAY); + else + nesdev->link_recheck = 0; + + spin_unlock_irqrestore(&nesadapter->phy_lock, flags); +} static void nes_nic_napi_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq) diff --git a/drivers/infiniband/hw/nes/nes_hw.h b/drivers/infiniband/hw/nes/nes_hw.h index 1204c3432b63..d2abe07133a5 100644 --- a/drivers/infiniband/hw/nes/nes_hw.h +++ b/drivers/infiniband/hw/nes/nes_hw.h @@ -1193,6 +1193,8 @@ struct nes_listener { struct nes_ib_device; +#define NES_EVENT_DELAY msecs_to_jiffies(100) + struct nes_vnic { struct nes_ib_device *nesibdev; u64 sq_full; @@ -1247,6 +1249,10 @@ struct nes_vnic { u32 lro_max_aggr; struct net_lro_mgr lro_mgr; struct net_lro_desc lro_desc[NES_MAX_LRO_DESCRIPTORS]; + struct timer_list event_timer; + enum ib_event_type delayed_event; + enum ib_event_type last_dispatched_event; + spinlock_t port_ibevent_lock; }; struct nes_ib_device { @@ -1348,6 +1354,10 @@ struct nes_terminate_hdr { #define BAD_FRAME_OFFSET 64 #define CQE_MAJOR_DRV 0x8000 +/* Used for link status recheck after interrupt processing */ +#define NES_LINK_RECHECK_DELAY msecs_to_jiffies(50) +#define NES_LINK_RECHECK_MAX 60 + #define nes_vlan_rx vlan_hwaccel_receive_skb #define nes_netif_rx netif_receive_skb diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c index 5a4c36484722..2c9c1933bbe3 100644 --- a/drivers/infiniband/hw/nes/nes_nic.c +++ b/drivers/infiniband/hw/nes/nes_nic.c @@ -144,6 +144,7 @@ static int nes_netdev_open(struct net_device *netdev) u32 nic_active_bit; u32 nic_active; struct list_head *list_pos, *list_temp; + unsigned long flags; assert(nesdev != NULL); @@ -233,18 +234,36 @@ static int nes_netdev_open(struct net_device *netdev) first_nesvnic = nesvnic; } - if (nesvnic->of_device_registered) { - nesdev->iw_status = 1; - nesdev->nesadapter->send_term_ok = 1; - nes_port_ibevent(nesvnic); - } - if (first_nesvnic->linkup) { /* Enable network packets */ nesvnic->linkup = 1; netif_start_queue(netdev); netif_carrier_on(netdev); } + + spin_lock_irqsave(&nesdev->nesadapter->phy_lock, flags); + if (nesdev->nesadapter->phy_type[nesdev->mac_index] == NES_PHY_TYPE_SFP_D) { + if (nesdev->link_recheck) + cancel_delayed_work(&nesdev->work); + nesdev->link_recheck = 1; + schedule_delayed_work(&nesdev->work, NES_LINK_RECHECK_DELAY); + } + spin_unlock_irqrestore(&nesdev->nesadapter->phy_lock, flags); + + spin_lock_irqsave(&nesvnic->port_ibevent_lock, flags); + if (nesvnic->of_device_registered) { + nesdev->nesadapter->send_term_ok = 1; + if (nesvnic->linkup == 1) { + if (nesdev->iw_status == 0) { + nesdev->iw_status = 1; + nes_port_ibevent(nesvnic); + } + } else { + nesdev->iw_status = 0; + } + } + spin_unlock_irqrestore(&nesvnic->port_ibevent_lock, flags); + napi_enable(&nesvnic->napi); nesvnic->netdev_open = 1; @@ -263,6 +282,7 @@ static int nes_netdev_stop(struct net_device *netdev) u32 nic_active; struct nes_vnic *first_nesvnic = NULL; struct list_head *list_pos, *list_temp; + unsigned long flags; nes_debug(NES_DBG_SHUTDOWN, "nesvnic=%p, nesdev=%p, netdev=%p %s\n", nesvnic, nesdev, netdev, netdev->name); @@ -315,12 +335,17 @@ static int nes_netdev_stop(struct net_device *netdev) nic_active &= nic_active_mask; nes_write_indexed(nesdev, NES_IDX_NIC_BROADCAST_ON, nic_active); - + spin_lock_irqsave(&nesvnic->port_ibevent_lock, flags); if (nesvnic->of_device_registered) { nesdev->nesadapter->send_term_ok = 0; nesdev->iw_status = 0; - nes_port_ibevent(nesvnic); + if (nesvnic->linkup == 1) + nes_port_ibevent(nesvnic); } + del_timer_sync(&nesvnic->event_timer); + nesvnic->event_timer.function = NULL; + spin_unlock_irqrestore(&nesvnic->port_ibevent_lock, flags); + nes_destroy_nic_qp(nesvnic); nesvnic->netdev_open = 0; @@ -1750,7 +1775,10 @@ struct net_device *nes_netdev_init(struct nes_device *nesdev, nesvnic->rdma_enabled = 0; } nesvnic->nic_cq.cq_number = nesvnic->nic.qp_id; + init_timer(&nesvnic->event_timer); + nesvnic->event_timer.function = NULL; spin_lock_init(&nesvnic->tx_lock); + spin_lock_init(&nesvnic->port_ibevent_lock); nesdev->netdev[nesdev->netdev_count] = netdev; nes_debug(NES_DBG_INIT, "Adding nesvnic (%p) to the adapters nesvnic_list for MAC%d.\n", @@ -1763,8 +1791,11 @@ struct net_device *nes_netdev_init(struct nes_device *nesdev, (((PCI_FUNC(nesdev->pcidev->devfn) == 1) && (nesdev->mac_index == 2)) || ((PCI_FUNC(nesdev->pcidev->devfn) == 2) && (nesdev->mac_index == 1)))))) { u32 u32temp; - u32 link_mask; - u32 link_val; + u32 link_mask = 0; + u32 link_val = 0; + u16 temp_phy_data; + u16 phy_data = 0; + unsigned long flags; u32temp = nes_read_indexed(nesdev, NES_IDX_PHY_PCS_CONTROL_STATUS0 + (0x200 * (nesdev->mac_index & 1))); @@ -1786,6 +1817,23 @@ struct net_device *nes_netdev_init(struct nes_device *nesdev, link_val = 0x02020000; } break; + case NES_PHY_TYPE_SFP_D: + spin_lock_irqsave(&nesdev->nesadapter->phy_lock, flags); + nes_read_10G_phy_reg(nesdev, + nesdev->nesadapter->phy_index[nesdev->mac_index], + 1, 0x9003); + temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); + nes_read_10G_phy_reg(nesdev, + nesdev->nesadapter->phy_index[nesdev->mac_index], + 3, 0x0021); + nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); + nes_read_10G_phy_reg(nesdev, + nesdev->nesadapter->phy_index[nesdev->mac_index], + 3, 0x0021); + phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); + spin_unlock_irqrestore(&nesdev->nesadapter->phy_lock, flags); + phy_data = (!temp_phy_data && (phy_data == 0x8000)) ? 0x4 : 0x0; + break; default: link_mask = 0x0f1f0000; link_val = 0x0f0f0000; @@ -1795,8 +1843,14 @@ struct net_device *nes_netdev_init(struct nes_device *nesdev, u32temp = nes_read_indexed(nesdev, NES_IDX_PHY_PCS_CONTROL_STATUS0 + (0x200 * (nesdev->mac_index & 1))); - if ((u32temp & link_mask) == link_val) - nesvnic->linkup = 1; + + if (phy_type == NES_PHY_TYPE_SFP_D) { + if (phy_data & 0x0004) + nesvnic->linkup = 1; + } else { + if ((u32temp & link_mask) == link_val) + nesvnic->linkup = 1; + } /* clear the MAC interrupt status, assumes direct logical to physical mapping */ u32temp = nes_read_indexed(nesdev, NES_IDX_MAC_INT_STATUS + (0x200 * nesdev->mac_index)); diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index 99933e4e48ff..26d8018c0a7c 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -3936,6 +3936,30 @@ struct nes_ib_device *nes_init_ofa_device(struct net_device *netdev) return nesibdev; } + +/** + * nes_handle_delayed_event + */ +static void nes_handle_delayed_event(unsigned long data) +{ + struct nes_vnic *nesvnic = (void *) data; + + if (nesvnic->delayed_event != nesvnic->last_dispatched_event) { + struct ib_event event; + + event.device = &nesvnic->nesibdev->ibdev; + if (!event.device) + goto stop_timer; + event.event = nesvnic->delayed_event; + event.element.port_num = nesvnic->logical_port + 1; + ib_dispatch_event(&event); + } + +stop_timer: + nesvnic->event_timer.function = NULL; +} + + void nes_port_ibevent(struct nes_vnic *nesvnic) { struct nes_ib_device *nesibdev = nesvnic->nesibdev; @@ -3944,7 +3968,18 @@ void nes_port_ibevent(struct nes_vnic *nesvnic) event.device = &nesibdev->ibdev; event.element.port_num = nesvnic->logical_port + 1; event.event = nesdev->iw_status ? IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR; - ib_dispatch_event(&event); + + if (!nesvnic->event_timer.function) { + ib_dispatch_event(&event); + nesvnic->last_dispatched_event = event.event; + nesvnic->event_timer.function = nes_handle_delayed_event; + nesvnic->event_timer.data = (unsigned long) nesvnic; + nesvnic->event_timer.expires = jiffies + NES_EVENT_DELAY; + add_timer(&nesvnic->event_timer); + } else { + mod_timer(&nesvnic->event_timer, jiffies + NES_EVENT_DELAY); + } + nesvnic->delayed_event = event.event; } diff --git a/drivers/infiniband/hw/qib/qib_iba7220.c b/drivers/infiniband/hw/qib/qib_iba7220.c index 127a0d5069f0..de799f17cb9e 100644 --- a/drivers/infiniband/hw/qib/qib_iba7220.c +++ b/drivers/infiniband/hw/qib/qib_iba7220.c @@ -1692,8 +1692,7 @@ static void qib_7220_quiet_serdes(struct qib_pportdata *ppd) ppd->lflags &= ~QIBL_IB_AUTONEG_INPROG; spin_unlock_irqrestore(&ppd->lflags_lock, flags); wake_up(&ppd->cpspec->autoneg_wait); - cancel_delayed_work(&ppd->cpspec->autoneg_work); - flush_scheduled_work(); + cancel_delayed_work_sync(&ppd->cpspec->autoneg_work); shutdown_7220_relock_poll(ppd->dd); val = qib_read_kreg64(ppd->dd, kr_xgxs_cfg); @@ -3515,8 +3514,8 @@ static void try_7220_autoneg(struct qib_pportdata *ppd) toggle_7220_rclkrls(ppd->dd); /* 2 msec is minimum length of a poll cycle */ - schedule_delayed_work(&ppd->cpspec->autoneg_work, - msecs_to_jiffies(2)); + queue_delayed_work(ib_wq, &ppd->cpspec->autoneg_work, + msecs_to_jiffies(2)); } /* diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c index abd409d592ef..50cceb3ab885 100644 --- a/drivers/infiniband/hw/qib/qib_iba7322.c +++ b/drivers/infiniband/hw/qib/qib_iba7322.c @@ -2406,10 +2406,9 @@ static void qib_7322_mini_quiet_serdes(struct qib_pportdata *ppd) ppd->lflags &= ~QIBL_IB_AUTONEG_INPROG; spin_unlock_irqrestore(&ppd->lflags_lock, flags); wake_up(&ppd->cpspec->autoneg_wait); - cancel_delayed_work(&ppd->cpspec->autoneg_work); + cancel_delayed_work_sync(&ppd->cpspec->autoneg_work); if (ppd->dd->cspec->r1) - cancel_delayed_work(&ppd->cpspec->ipg_work); - flush_scheduled_work(); + cancel_delayed_work_sync(&ppd->cpspec->ipg_work); ppd->cpspec->chase_end = 0; if (ppd->cpspec->chase_timer.data) /* if initted */ @@ -2706,7 +2705,7 @@ static noinline void unknown_7322_gpio_intr(struct qib_devdata *dd) if (!(pins & mask)) { ++handled; qd->t_insert = get_jiffies_64(); - schedule_work(&qd->work); + queue_work(ib_wq, &qd->work); } } } @@ -4990,8 +4989,8 @@ static void try_7322_autoneg(struct qib_pportdata *ppd) set_7322_ibspeed_fast(ppd, QIB_IB_DDR); qib_7322_mini_pcs_reset(ppd); /* 2 msec is minimum length of a poll cycle */ - schedule_delayed_work(&ppd->cpspec->autoneg_work, - msecs_to_jiffies(2)); + queue_delayed_work(ib_wq, &ppd->cpspec->autoneg_work, + msecs_to_jiffies(2)); } /* @@ -5121,7 +5120,8 @@ static void try_7322_ipg(struct qib_pportdata *ppd) ib_free_send_mad(send_buf); retry: delay = 2 << ppd->cpspec->ipg_tries; - schedule_delayed_work(&ppd->cpspec->ipg_work, msecs_to_jiffies(delay)); + queue_delayed_work(ib_wq, &ppd->cpspec->ipg_work, + msecs_to_jiffies(delay)); } /* diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c index 7896afbb9ce8..ffefb78b8949 100644 --- a/drivers/infiniband/hw/qib/qib_init.c +++ b/drivers/infiniband/hw/qib/qib_init.c @@ -80,7 +80,6 @@ unsigned qib_wc_pat = 1; /* default (1) is to use PAT, not MTRR */ module_param_named(wc_pat, qib_wc_pat, uint, S_IRUGO); MODULE_PARM_DESC(wc_pat, "enable write-combining via PAT mechanism"); -struct workqueue_struct *qib_wq; struct workqueue_struct *qib_cq_wq; static void verify_interrupt(unsigned long); @@ -270,23 +269,20 @@ static void init_shadow_tids(struct qib_devdata *dd) struct page **pages; dma_addr_t *addrs; - pages = vmalloc(dd->cfgctxts * dd->rcvtidcnt * sizeof(struct page *)); + pages = vzalloc(dd->cfgctxts * dd->rcvtidcnt * sizeof(struct page *)); if (!pages) { qib_dev_err(dd, "failed to allocate shadow page * " "array, no expected sends!\n"); goto bail; } - addrs = vmalloc(dd->cfgctxts * dd->rcvtidcnt * sizeof(dma_addr_t)); + addrs = vzalloc(dd->cfgctxts * dd->rcvtidcnt * sizeof(dma_addr_t)); if (!addrs) { qib_dev_err(dd, "failed to allocate shadow dma handle " "array, no expected sends!\n"); goto bail_free; } - memset(pages, 0, dd->cfgctxts * dd->rcvtidcnt * sizeof(struct page *)); - memset(addrs, 0, dd->cfgctxts * dd->rcvtidcnt * sizeof(dma_addr_t)); - dd->pageshadow = pages; dd->physshadow = addrs; return; @@ -1047,24 +1043,10 @@ static int __init qlogic_ib_init(void) if (ret) goto bail; - /* - * We create our own workqueue mainly because we want to be - * able to flush it when devices are being removed. We can't - * use schedule_work()/flush_scheduled_work() because both - * unregister_netdev() and linkwatch_event take the rtnl lock, - * so flush_scheduled_work() can deadlock during device - * removal. - */ - qib_wq = create_workqueue("qib"); - if (!qib_wq) { - ret = -ENOMEM; - goto bail_dev; - } - qib_cq_wq = create_singlethread_workqueue("qib_cq"); if (!qib_cq_wq) { ret = -ENOMEM; - goto bail_wq; + goto bail_dev; } /* @@ -1094,8 +1076,6 @@ bail_unit: idr_destroy(&qib_unit_table); bail_cq_wq: destroy_workqueue(qib_cq_wq); -bail_wq: - destroy_workqueue(qib_wq); bail_dev: qib_dev_cleanup(); bail: @@ -1119,7 +1099,6 @@ static void __exit qlogic_ib_cleanup(void) pci_unregister_driver(&qib_driver); - destroy_workqueue(qib_wq); destroy_workqueue(qib_cq_wq); qib_cpulist_count = 0; @@ -1292,7 +1271,7 @@ static int __devinit qib_init_one(struct pci_dev *pdev, if (qib_mini_init || initfail || ret) { qib_stop_timers(dd); - flush_scheduled_work(); + flush_workqueue(ib_wq); for (pidx = 0; pidx < dd->num_pports; ++pidx) dd->f_quiet_serdes(dd->pport + pidx); if (qib_mini_init) @@ -1341,8 +1320,8 @@ static void __devexit qib_remove_one(struct pci_dev *pdev) qib_stop_timers(dd); - /* wait until all of our (qsfp) schedule_work() calls complete */ - flush_scheduled_work(); + /* wait until all of our (qsfp) queue_work() calls complete */ + flush_workqueue(ib_wq); ret = qibfs_remove(dd); if (ret) diff --git a/drivers/infiniband/hw/qib/qib_qsfp.c b/drivers/infiniband/hw/qib/qib_qsfp.c index 35b3604b691d..3374a52232c1 100644 --- a/drivers/infiniband/hw/qib/qib_qsfp.c +++ b/drivers/infiniband/hw/qib/qib_qsfp.c @@ -485,7 +485,7 @@ void qib_qsfp_init(struct qib_qsfp_data *qd, goto bail; /* We see a module, but it may be unwise to look yet. Just schedule */ qd->t_insert = get_jiffies_64(); - schedule_work(&qd->work); + queue_work(ib_wq, &qd->work); bail: return; } @@ -493,10 +493,9 @@ bail: void qib_qsfp_deinit(struct qib_qsfp_data *qd) { /* - * There is nothing to do here for now. our - * work is scheduled with schedule_work(), and - * flush_scheduled_work() from remove_one will - * block until all work ssetup with schedule_work() + * There is nothing to do here for now. our work is scheduled + * with queue_work(), and flush_workqueue() from remove_one + * will block until all work setup with queue_work() * completes. */ } diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h index 63b22a9a7feb..95e5b47223b3 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.h +++ b/drivers/infiniband/hw/qib/qib_verbs.h @@ -805,7 +805,6 @@ static inline int qib_send_ok(struct qib_qp *qp) !(qp->s_flags & QIB_S_ANY_WAIT_SEND)); } -extern struct workqueue_struct *qib_wq; extern struct workqueue_struct *qib_cq_wq; /* @@ -814,7 +813,7 @@ extern struct workqueue_struct *qib_cq_wq; static inline void qib_schedule_send(struct qib_qp *qp) { if (qib_send_ok(qp)) - queue_work(qib_wq, &qp->s_work); + queue_work(ib_wq, &qp->s_work); } static inline int qib_pkey_ok(u16 pkey1, u16 pkey2) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index c1c49f2d35b5..93d55806b967 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -352,15 +352,13 @@ static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_i int ret; int i; - rx->rx_ring = vmalloc(ipoib_recvq_size * sizeof *rx->rx_ring); + rx->rx_ring = vzalloc(ipoib_recvq_size * sizeof *rx->rx_ring); if (!rx->rx_ring) { printk(KERN_WARNING "%s: failed to allocate CM non-SRQ ring (%d entries)\n", priv->ca->name, ipoib_recvq_size); return -ENOMEM; } - memset(rx->rx_ring, 0, ipoib_recvq_size * sizeof *rx->rx_ring); - t = kmalloc(sizeof *t, GFP_KERNEL); if (!t) { ret = -ENOMEM; @@ -1097,13 +1095,12 @@ static int ipoib_cm_tx_init(struct ipoib_cm_tx *p, u32 qpn, struct ipoib_dev_priv *priv = netdev_priv(p->dev); int ret; - p->tx_ring = vmalloc(ipoib_sendq_size * sizeof *p->tx_ring); + p->tx_ring = vzalloc(ipoib_sendq_size * sizeof *p->tx_ring); if (!p->tx_ring) { ipoib_warn(priv, "failed to allocate tx ring\n"); ret = -ENOMEM; goto err_tx; } - memset(p->tx_ring, 0, ipoib_sendq_size * sizeof *p->tx_ring); p->qp = ipoib_cm_create_tx_qp(p->dev, p); if (IS_ERR(p->qp)) { @@ -1521,7 +1518,7 @@ static void ipoib_cm_create_srq(struct net_device *dev, int max_sge) return; } - priv->cm.srq_ring = vmalloc(ipoib_recvq_size * sizeof *priv->cm.srq_ring); + priv->cm.srq_ring = vzalloc(ipoib_recvq_size * sizeof *priv->cm.srq_ring); if (!priv->cm.srq_ring) { printk(KERN_WARNING "%s: failed to allocate CM SRQ ring (%d entries)\n", priv->ca->name, ipoib_recvq_size); @@ -1530,7 +1527,6 @@ static void ipoib_cm_create_srq(struct net_device *dev, int max_sge) return; } - memset(priv->cm.srq_ring, 0, ipoib_recvq_size * sizeof *priv->cm.srq_ring); } int ipoib_cm_dev_init(struct net_device *dev) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 7a07a728fe0d..aca3b44f7aed 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -916,13 +916,12 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port) goto out; } - priv->tx_ring = vmalloc(ipoib_sendq_size * sizeof *priv->tx_ring); + priv->tx_ring = vzalloc(ipoib_sendq_size * sizeof *priv->tx_ring); if (!priv->tx_ring) { printk(KERN_WARNING "%s: failed to allocate TX ring (%d entries)\n", ca->name, ipoib_sendq_size); goto out_rx_ring_cleanup; } - memset(priv->tx_ring, 0, ipoib_sendq_size * sizeof *priv->tx_ring); /* priv->tx_head, tx_tail & tx_outstanding are already 0 */ diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 4b62105ed1e8..83664ed2804f 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -638,7 +638,7 @@ err: if (target->state == SRP_TARGET_CONNECTING) { target->state = SRP_TARGET_DEAD; INIT_WORK(&target->work, srp_remove_work); - schedule_work(&target->work); + queue_work(ib_wq, &target->work); } spin_unlock_irq(&target->lock); @@ -1132,15 +1132,12 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd) spin_lock_irqsave(&target->lock, flags); iu = __srp_get_tx_iu(target, SRP_IU_CMD); - if (iu) { - req = list_first_entry(&target->free_reqs, struct srp_request, - list); - list_del(&req->list); - } - spin_unlock_irqrestore(&target->lock, flags); - if (!iu) - goto err; + goto err_unlock; + + req = list_first_entry(&target->free_reqs, struct srp_request, list); + list_del(&req->list); + spin_unlock_irqrestore(&target->lock, flags); dev = target->srp_host->srp_dev->dev; ib_dma_sync_single_for_cpu(dev, iu->dma, srp_max_iu_len, @@ -1185,6 +1182,8 @@ err_iu: spin_lock_irqsave(&target->lock, flags); list_add(&req->list, &target->free_reqs); + +err_unlock: spin_unlock_irqrestore(&target->lock, flags); err: @@ -2199,7 +2198,7 @@ static void srp_remove_one(struct ib_device *device) * started before we marked our target ports as * removed, and any target port removal tasks. */ - flush_scheduled_work(); + flush_workqueue(ib_wq); list_for_each_entry_safe(target, tmp_target, &host->target_list, list) { diff --git a/drivers/mtd/Kconfig b/drivers/mtd/Kconfig index b1f768917395..77414702cb00 100644 --- a/drivers/mtd/Kconfig +++ b/drivers/mtd/Kconfig @@ -53,9 +53,10 @@ config MTD_PARTITIONS devices. Partitioning on NFTL 'devices' is a different - that's the 'normal' form of partitioning used on a block device. +if MTD_PARTITIONS + config MTD_REDBOOT_PARTS tristate "RedBoot partition table parsing" - depends on MTD_PARTITIONS ---help--- RedBoot is a ROM monitor and bootloader which deals with multiple 'images' in flash devices by putting a table one of the erase @@ -72,9 +73,10 @@ config MTD_REDBOOT_PARTS SA1100 map driver (CONFIG_MTD_SA1100) has an option for this, for example. +if MTD_REDBOOT_PARTS + config MTD_REDBOOT_DIRECTORY_BLOCK int "Location of RedBoot partition table" - depends on MTD_REDBOOT_PARTS default "-1" ---help--- This option is the Linux counterpart to the @@ -91,18 +93,18 @@ config MTD_REDBOOT_DIRECTORY_BLOCK config MTD_REDBOOT_PARTS_UNALLOCATED bool "Include unallocated flash regions" - depends on MTD_REDBOOT_PARTS help If you need to register each unallocated flash region as a MTD 'partition', enable this option. config MTD_REDBOOT_PARTS_READONLY bool "Force read-only for RedBoot system images" - depends on MTD_REDBOOT_PARTS help If you need to force read-only for 'RedBoot', 'RedBoot Config' and 'FIS directory' images, enable this option. +endif # MTD_REDBOOT_PARTS + config MTD_CMDLINE_PARTS bool "Command line partition table parsing" depends on MTD_PARTITIONS = "y" && MTD = "y" @@ -142,7 +144,7 @@ config MTD_CMDLINE_PARTS config MTD_AFS_PARTS tristate "ARM Firmware Suite partition parsing" - depends on ARM && MTD_PARTITIONS + depends on ARM ---help--- The ARM Firmware Suite allows the user to divide flash devices into multiple 'images'. Each such image has a header containing its name @@ -158,8 +160,8 @@ config MTD_AFS_PARTS example. config MTD_OF_PARTS - tristate "Flash partition map based on OF description" - depends on OF && MTD_PARTITIONS + def_bool y + depends on OF help This provides a partition parsing function which derives the partition map from the children of the flash node, @@ -167,10 +169,11 @@ config MTD_OF_PARTS config MTD_AR7_PARTS tristate "TI AR7 partitioning support" - depends on MTD_PARTITIONS ---help--- TI AR7 partitioning support +endif # MTD_PARTITIONS + comment "User Modules And Translation Layers" config MTD_CHAR diff --git a/drivers/mtd/Makefile b/drivers/mtd/Makefile index 760abc533395..d4e7f25b1ebb 100644 --- a/drivers/mtd/Makefile +++ b/drivers/mtd/Makefile @@ -6,13 +6,13 @@ obj-$(CONFIG_MTD) += mtd.o mtd-y := mtdcore.o mtdsuper.o mtd-$(CONFIG_MTD_PARTITIONS) += mtdpart.o +mtd-$(CONFIG_MTD_OF_PARTS) += ofpart.o obj-$(CONFIG_MTD_CONCAT) += mtdconcat.o obj-$(CONFIG_MTD_REDBOOT_PARTS) += redboot.o obj-$(CONFIG_MTD_CMDLINE_PARTS) += cmdlinepart.o obj-$(CONFIG_MTD_AFS_PARTS) += afs.o obj-$(CONFIG_MTD_AR7_PARTS) += ar7part.o -obj-$(CONFIG_MTD_OF_PARTS) += ofpart.o # 'Users' - code which presents functionality to userspace. obj-$(CONFIG_MTD_CHAR) += mtdchar.o diff --git a/drivers/mtd/chips/cfi_cmdset_0001.c b/drivers/mtd/chips/cfi_cmdset_0001.c index ad9268b44416..a8c3e1c9b02a 100644 --- a/drivers/mtd/chips/cfi_cmdset_0001.c +++ b/drivers/mtd/chips/cfi_cmdset_0001.c @@ -162,7 +162,7 @@ static void cfi_tell_features(struct cfi_pri_intelext *extp) #endif /* Atmel chips don't use the same PRI format as Intel chips */ -static void fixup_convert_atmel_pri(struct mtd_info *mtd, void *param) +static void fixup_convert_atmel_pri(struct mtd_info *mtd) { struct map_info *map = mtd->priv; struct cfi_private *cfi = map->fldrv_priv; @@ -202,7 +202,7 @@ static void fixup_convert_atmel_pri(struct mtd_info *mtd, void *param) cfi->cfiq->BufWriteTimeoutMax = 0; } -static void fixup_at49bv640dx_lock(struct mtd_info *mtd, void *param) +static void fixup_at49bv640dx_lock(struct mtd_info *mtd) { struct map_info *map = mtd->priv; struct cfi_private *cfi = map->fldrv_priv; @@ -214,7 +214,7 @@ static void fixup_at49bv640dx_lock(struct mtd_info *mtd, void *param) #ifdef CMDSET0001_DISABLE_ERASE_SUSPEND_ON_WRITE /* Some Intel Strata Flash prior to FPO revision C has bugs in this area */ -static void fixup_intel_strataflash(struct mtd_info *mtd, void* param) +static void fixup_intel_strataflash(struct mtd_info *mtd) { struct map_info *map = mtd->priv; struct cfi_private *cfi = map->fldrv_priv; @@ -227,7 +227,7 @@ static void fixup_intel_strataflash(struct mtd_info *mtd, void* param) #endif #ifdef CMDSET0001_DISABLE_WRITE_SUSPEND -static void fixup_no_write_suspend(struct mtd_info *mtd, void* param) +static void fixup_no_write_suspend(struct mtd_info *mtd) { struct map_info *map = mtd->priv; struct cfi_private *cfi = map->fldrv_priv; @@ -240,7 +240,7 @@ static void fixup_no_write_suspend(struct mtd_info *mtd, void* param) } #endif -static void fixup_st_m28w320ct(struct mtd_info *mtd, void* param) +static void fixup_st_m28w320ct(struct mtd_info *mtd) { struct map_info *map = mtd->priv; struct cfi_private *cfi = map->fldrv_priv; @@ -249,7 +249,7 @@ static void fixup_st_m28w320ct(struct mtd_info *mtd, void* param) cfi->cfiq->BufWriteTimeoutMax = 0; /* Not supported */ } -static void fixup_st_m28w320cb(struct mtd_info *mtd, void* param) +static void fixup_st_m28w320cb(struct mtd_info *mtd) { struct map_info *map = mtd->priv; struct cfi_private *cfi = map->fldrv_priv; @@ -259,7 +259,7 @@ static void fixup_st_m28w320cb(struct mtd_info *mtd, void* param) (cfi->cfiq->EraseRegionInfo[1] & 0xffff0000) | 0x3e; }; -static void fixup_use_point(struct mtd_info *mtd, void *param) +static void fixup_use_point(struct mtd_info *mtd) { struct map_info *map = mtd->priv; if (!mtd->point && map_is_linear(map)) { @@ -268,7 +268,7 @@ static void fixup_use_point(struct mtd_info *mtd, void *param) } } -static void fixup_use_write_buffers(struct mtd_info *mtd, void *param) +static void fixup_use_write_buffers(struct mtd_info *mtd) { struct map_info *map = mtd->priv; struct cfi_private *cfi = map->fldrv_priv; @@ -282,7 +282,7 @@ static void fixup_use_write_buffers(struct mtd_info *mtd, void *param) /* * Some chips power-up with all sectors locked by default. */ -static void fixup_unlock_powerup_lock(struct mtd_info *mtd, void *param) +static void fixup_unlock_powerup_lock(struct mtd_info *mtd) { struct map_info *map = mtd->priv; struct cfi_private *cfi = map->fldrv_priv; @@ -295,31 +295,31 @@ static void fixup_unlock_powerup_lock(struct mtd_info *mtd, void *param) } static struct cfi_fixup cfi_fixup_table[] = { - { CFI_MFR_ATMEL, CFI_ID_ANY, fixup_convert_atmel_pri, NULL }, - { CFI_MFR_ATMEL, AT49BV640D, fixup_at49bv640dx_lock, NULL }, - { CFI_MFR_ATMEL, AT49BV640DT, fixup_at49bv640dx_lock, NULL }, + { CFI_MFR_ATMEL, CFI_ID_ANY, fixup_convert_atmel_pri }, + { CFI_MFR_ATMEL, AT49BV640D, fixup_at49bv640dx_lock }, + { CFI_MFR_ATMEL, AT49BV640DT, fixup_at49bv640dx_lock }, #ifdef CMDSET0001_DISABLE_ERASE_SUSPEND_ON_WRITE - { CFI_MFR_ANY, CFI_ID_ANY, fixup_intel_strataflash, NULL }, + { CFI_MFR_ANY, CFI_ID_ANY, fixup_intel_strataflash }, #endif #ifdef CMDSET0001_DISABLE_WRITE_SUSPEND - { CFI_MFR_ANY, CFI_ID_ANY, fixup_no_write_suspend, NULL }, + { CFI_MFR_ANY, CFI_ID_ANY, fixup_no_write_suspend }, #endif #if !FORCE_WORD_WRITE - { CFI_MFR_ANY, CFI_ID_ANY, fixup_use_write_buffers, NULL }, + { CFI_MFR_ANY, CFI_ID_ANY, fixup_use_write_buffers }, #endif - { CFI_MFR_ST, 0x00ba, /* M28W320CT */ fixup_st_m28w320ct, NULL }, - { CFI_MFR_ST, 0x00bb, /* M28W320CB */ fixup_st_m28w320cb, NULL }, - { CFI_MFR_INTEL, CFI_ID_ANY, fixup_unlock_powerup_lock, NULL, }, - { 0, 0, NULL, NULL } + { CFI_MFR_ST, 0x00ba, /* M28W320CT */ fixup_st_m28w320ct }, + { CFI_MFR_ST, 0x00bb, /* M28W320CB */ fixup_st_m28w320cb }, + { CFI_MFR_INTEL, CFI_ID_ANY, fixup_unlock_powerup_lock }, + { 0, 0, NULL } }; static struct cfi_fixup jedec_fixup_table[] = { - { CFI_MFR_INTEL, I82802AB, fixup_use_fwh_lock, NULL, }, - { CFI_MFR_INTEL, I82802AC, fixup_use_fwh_lock, NULL, }, - { CFI_MFR_ST, M50LPW080, fixup_use_fwh_lock, NULL, }, - { CFI_MFR_ST, M50FLW080A, fixup_use_fwh_lock, NULL, }, - { CFI_MFR_ST, M50FLW080B, fixup_use_fwh_lock, NULL, }, - { 0, 0, NULL, NULL } + { CFI_MFR_INTEL, I82802AB, fixup_use_fwh_lock }, + { CFI_MFR_INTEL, I82802AC, fixup_use_fwh_lock }, + { CFI_MFR_ST, M50LPW080, fixup_use_fwh_lock }, + { CFI_MFR_ST, M50FLW080A, fixup_use_fwh_lock }, + { CFI_MFR_ST, M50FLW080B, fixup_use_fwh_lock }, + { 0, 0, NULL } }; static struct cfi_fixup fixup_table[] = { /* The CFI vendor ids and the JEDEC vendor IDs appear @@ -327,8 +327,8 @@ static struct cfi_fixup fixup_table[] = { * well. This table is to pick all cases where * we know that is the case. */ - { CFI_MFR_ANY, CFI_ID_ANY, fixup_use_point, NULL }, - { 0, 0, NULL, NULL } + { CFI_MFR_ANY, CFI_ID_ANY, fixup_use_point }, + { 0, 0, NULL } }; static void cfi_fixup_major_minor(struct cfi_private *cfi, @@ -455,6 +455,7 @@ struct mtd_info *cfi_cmdset_0001(struct map_info *map, int primary) mtd->flags = MTD_CAP_NORFLASH; mtd->name = map->name; mtd->writesize = 1; + mtd->writebufsize = 1 << cfi->cfiq->MaxBufWriteSize; mtd->reboot_notifier.notifier_call = cfi_intelext_reboot; diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c index 3b8e32d87977..f072fcfde04e 100644 --- a/drivers/mtd/chips/cfi_cmdset_0002.c +++ b/drivers/mtd/chips/cfi_cmdset_0002.c @@ -134,7 +134,7 @@ static void cfi_tell_features(struct cfi_pri_amdstd *extp) #ifdef AMD_BOOTLOC_BUG /* Wheee. Bring me the head of someone at AMD. */ -static void fixup_amd_bootblock(struct mtd_info *mtd, void* param) +static void fixup_amd_bootblock(struct mtd_info *mtd) { struct map_info *map = mtd->priv; struct cfi_private *cfi = map->fldrv_priv; @@ -186,7 +186,7 @@ static void fixup_amd_bootblock(struct mtd_info *mtd, void* param) } #endif -static void fixup_use_write_buffers(struct mtd_info *mtd, void *param) +static void fixup_use_write_buffers(struct mtd_info *mtd) { struct map_info *map = mtd->priv; struct cfi_private *cfi = map->fldrv_priv; @@ -197,7 +197,7 @@ static void fixup_use_write_buffers(struct mtd_info *mtd, void *param) } /* Atmel chips don't use the same PRI format as AMD chips */ -static void fixup_convert_atmel_pri(struct mtd_info *mtd, void *param) +static void fixup_convert_atmel_pri(struct mtd_info *mtd) { struct map_info *map = mtd->priv; struct cfi_private *cfi = map->fldrv_priv; @@ -228,14 +228,14 @@ static void fixup_convert_atmel_pri(struct mtd_info *mtd, void *param) cfi->cfiq->BufWriteTimeoutMax = 0; } -static void fixup_use_secsi(struct mtd_info *mtd, void *param) +static void fixup_use_secsi(struct mtd_info *mtd) { /* Setup for chips with a secsi area */ mtd->read_user_prot_reg = cfi_amdstd_secsi_read; mtd->read_fact_prot_reg = cfi_amdstd_secsi_read; } -static void fixup_use_erase_chip(struct mtd_info *mtd, void *param) +static void fixup_use_erase_chip(struct mtd_info *mtd) { struct map_info *map = mtd->priv; struct cfi_private *cfi = map->fldrv_priv; @@ -250,7 +250,7 @@ static void fixup_use_erase_chip(struct mtd_info *mtd, void *param) * Some Atmel chips (e.g. the AT49BV6416) power-up with all sectors * locked by default. */ -static void fixup_use_atmel_lock(struct mtd_info *mtd, void *param) +static void fixup_use_atmel_lock(struct mtd_info *mtd) { mtd->lock = cfi_atmel_lock; mtd->unlock = cfi_atmel_unlock; @@ -271,7 +271,7 @@ static void fixup_old_sst_eraseregion(struct mtd_info *mtd) cfi->cfiq->NumEraseRegions = 1; } -static void fixup_sst39vf(struct mtd_info *mtd, void *param) +static void fixup_sst39vf(struct mtd_info *mtd) { struct map_info *map = mtd->priv; struct cfi_private *cfi = map->fldrv_priv; @@ -282,7 +282,7 @@ static void fixup_sst39vf(struct mtd_info *mtd, void *param) cfi->addr_unlock2 = 0x2AAA; } -static void fixup_sst39vf_rev_b(struct mtd_info *mtd, void *param) +static void fixup_sst39vf_rev_b(struct mtd_info *mtd) { struct map_info *map = mtd->priv; struct cfi_private *cfi = map->fldrv_priv; @@ -295,12 +295,12 @@ static void fixup_sst39vf_rev_b(struct mtd_info *mtd, void *param) cfi->sector_erase_cmd = CMD(0x50); } -static void fixup_sst38vf640x_sectorsize(struct mtd_info *mtd, void *param) +static void fixup_sst38vf640x_sectorsize(struct mtd_info *mtd) { struct map_info *map = mtd->priv; struct cfi_private *cfi = map->fldrv_priv; - fixup_sst39vf_rev_b(mtd, param); + fixup_sst39vf_rev_b(mtd); /* * CFI reports 1024 sectors (0x03ff+1) of 64KBytes (0x0100*256) where @@ -310,7 +310,7 @@ static void fixup_sst38vf640x_sectorsize(struct mtd_info *mtd, void *param) pr_warning("%s: Bad 38VF640x CFI data; adjusting sector size from 64 to 8KiB\n", mtd->name); } -static void fixup_s29gl064n_sectors(struct mtd_info *mtd, void *param) +static void fixup_s29gl064n_sectors(struct mtd_info *mtd) { struct map_info *map = mtd->priv; struct cfi_private *cfi = map->fldrv_priv; @@ -321,7 +321,7 @@ static void fixup_s29gl064n_sectors(struct mtd_info *mtd, void *param) } } -static void fixup_s29gl032n_sectors(struct mtd_info *mtd, void *param) +static void fixup_s29gl032n_sectors(struct mtd_info *mtd) { struct map_info *map = mtd->priv; struct cfi_private *cfi = map->fldrv_priv; @@ -334,47 +334,47 @@ static void fixup_s29gl032n_sectors(struct mtd_info *mtd, void *param) /* Used to fix CFI-Tables of chips without Extended Query Tables */ static struct cfi_fixup cfi_nopri_fixup_table[] = { - { CFI_MFR_SST, 0x234A, fixup_sst39vf, NULL, }, /* SST39VF1602 */ - { CFI_MFR_SST, 0x234B, fixup_sst39vf, NULL, }, /* SST39VF1601 */ - { CFI_MFR_SST, 0x235A, fixup_sst39vf, NULL, }, /* SST39VF3202 */ - { CFI_MFR_SST, 0x235B, fixup_sst39vf, NULL, }, /* SST39VF3201 */ - { CFI_MFR_SST, 0x235C, fixup_sst39vf_rev_b, NULL, }, /* SST39VF3202B */ - { CFI_MFR_SST, 0x235D, fixup_sst39vf_rev_b, NULL, }, /* SST39VF3201B */ - { CFI_MFR_SST, 0x236C, fixup_sst39vf_rev_b, NULL, }, /* SST39VF6402B */ - { CFI_MFR_SST, 0x236D, fixup_sst39vf_rev_b, NULL, }, /* SST39VF6401B */ - { 0, 0, NULL, NULL } + { CFI_MFR_SST, 0x234a, fixup_sst39vf }, /* SST39VF1602 */ + { CFI_MFR_SST, 0x234b, fixup_sst39vf }, /* SST39VF1601 */ + { CFI_MFR_SST, 0x235a, fixup_sst39vf }, /* SST39VF3202 */ + { CFI_MFR_SST, 0x235b, fixup_sst39vf }, /* SST39VF3201 */ + { CFI_MFR_SST, 0x235c, fixup_sst39vf_rev_b }, /* SST39VF3202B */ + { CFI_MFR_SST, 0x235d, fixup_sst39vf_rev_b }, /* SST39VF3201B */ + { CFI_MFR_SST, 0x236c, fixup_sst39vf_rev_b }, /* SST39VF6402B */ + { CFI_MFR_SST, 0x236d, fixup_sst39vf_rev_b }, /* SST39VF6401B */ + { 0, 0, NULL } }; static struct cfi_fixup cfi_fixup_table[] = { - { CFI_MFR_ATMEL, CFI_ID_ANY, fixup_convert_atmel_pri, NULL }, + { CFI_MFR_ATMEL, CFI_ID_ANY, fixup_convert_atmel_pri }, #ifdef AMD_BOOTLOC_BUG - { CFI_MFR_AMD, CFI_ID_ANY, fixup_amd_bootblock, NULL }, - { CFI_MFR_MACRONIX, CFI_ID_ANY, fixup_amd_bootblock, NULL }, + { CFI_MFR_AMD, CFI_ID_ANY, fixup_amd_bootblock }, + { CFI_MFR_MACRONIX, CFI_ID_ANY, fixup_amd_bootblock }, #endif - { CFI_MFR_AMD, 0x0050, fixup_use_secsi, NULL, }, - { CFI_MFR_AMD, 0x0053, fixup_use_secsi, NULL, }, - { CFI_MFR_AMD, 0x0055, fixup_use_secsi, NULL, }, - { CFI_MFR_AMD, 0x0056, fixup_use_secsi, NULL, }, - { CFI_MFR_AMD, 0x005C, fixup_use_secsi, NULL, }, - { CFI_MFR_AMD, 0x005F, fixup_use_secsi, NULL, }, - { CFI_MFR_AMD, 0x0c01, fixup_s29gl064n_sectors, NULL, }, - { CFI_MFR_AMD, 0x1301, fixup_s29gl064n_sectors, NULL, }, - { CFI_MFR_AMD, 0x1a00, fixup_s29gl032n_sectors, NULL, }, - { CFI_MFR_AMD, 0x1a01, fixup_s29gl032n_sectors, NULL, }, - { CFI_MFR_SST, 0x536A, fixup_sst38vf640x_sectorsize, NULL, }, /* SST38VF6402 */ - { CFI_MFR_SST, 0x536B, fixup_sst38vf640x_sectorsize, NULL, }, /* SST38VF6401 */ - { CFI_MFR_SST, 0x536C, fixup_sst38vf640x_sectorsize, NULL, }, /* SST38VF6404 */ - { CFI_MFR_SST, 0x536D, fixup_sst38vf640x_sectorsize, NULL, }, /* SST38VF6403 */ + { CFI_MFR_AMD, 0x0050, fixup_use_secsi }, + { CFI_MFR_AMD, 0x0053, fixup_use_secsi }, + { CFI_MFR_AMD, 0x0055, fixup_use_secsi }, + { CFI_MFR_AMD, 0x0056, fixup_use_secsi }, + { CFI_MFR_AMD, 0x005C, fixup_use_secsi }, + { CFI_MFR_AMD, 0x005F, fixup_use_secsi }, + { CFI_MFR_AMD, 0x0c01, fixup_s29gl064n_sectors }, + { CFI_MFR_AMD, 0x1301, fixup_s29gl064n_sectors }, + { CFI_MFR_AMD, 0x1a00, fixup_s29gl032n_sectors }, + { CFI_MFR_AMD, 0x1a01, fixup_s29gl032n_sectors }, + { CFI_MFR_SST, 0x536a, fixup_sst38vf640x_sectorsize }, /* SST38VF6402 */ + { CFI_MFR_SST, 0x536b, fixup_sst38vf640x_sectorsize }, /* SST38VF6401 */ + { CFI_MFR_SST, 0x536c, fixup_sst38vf640x_sectorsize }, /* SST38VF6404 */ + { CFI_MFR_SST, 0x536d, fixup_sst38vf640x_sectorsize }, /* SST38VF6403 */ #if !FORCE_WORD_WRITE - { CFI_MFR_ANY, CFI_ID_ANY, fixup_use_write_buffers, NULL, }, + { CFI_MFR_ANY, CFI_ID_ANY, fixup_use_write_buffers }, #endif - { 0, 0, NULL, NULL } + { 0, 0, NULL } }; static struct cfi_fixup jedec_fixup_table[] = { - { CFI_MFR_SST, SST49LF004B, fixup_use_fwh_lock, NULL, }, - { CFI_MFR_SST, SST49LF040B, fixup_use_fwh_lock, NULL, }, - { CFI_MFR_SST, SST49LF008A, fixup_use_fwh_lock, NULL, }, - { 0, 0, NULL, NULL } + { CFI_MFR_SST, SST49LF004B, fixup_use_fwh_lock }, + { CFI_MFR_SST, SST49LF040B, fixup_use_fwh_lock }, + { CFI_MFR_SST, SST49LF008A, fixup_use_fwh_lock }, + { 0, 0, NULL } }; static struct cfi_fixup fixup_table[] = { @@ -383,18 +383,30 @@ static struct cfi_fixup fixup_table[] = { * well. This table is to pick all cases where * we know that is the case. */ - { CFI_MFR_ANY, CFI_ID_ANY, fixup_use_erase_chip, NULL }, - { CFI_MFR_ATMEL, AT49BV6416, fixup_use_atmel_lock, NULL }, - { 0, 0, NULL, NULL } + { CFI_MFR_ANY, CFI_ID_ANY, fixup_use_erase_chip }, + { CFI_MFR_ATMEL, AT49BV6416, fixup_use_atmel_lock }, + { 0, 0, NULL } }; static void cfi_fixup_major_minor(struct cfi_private *cfi, struct cfi_pri_amdstd *extp) { - if (cfi->mfr == CFI_MFR_SAMSUNG && cfi->id == 0x257e && - extp->MajorVersion == '0') - extp->MajorVersion = '1'; + if (cfi->mfr == CFI_MFR_SAMSUNG) { + if ((extp->MajorVersion == '0' && extp->MinorVersion == '0') || + (extp->MajorVersion == '3' && extp->MinorVersion == '3')) { + /* + * Samsung K8P2815UQB and K8D6x16UxM chips + * report major=0 / minor=0. + * K8D3x16UxC chips report major=3 / minor=3. + */ + printk(KERN_NOTICE " Fixing Samsung's Amd/Fujitsu" + " Extended Query version to 1.%c\n", + extp->MinorVersion); + extp->MajorVersion = '1'; + } + } + /* * SST 38VF640x chips report major=0xFF / minor=0xFF. */ @@ -428,6 +440,10 @@ struct mtd_info *cfi_cmdset_0002(struct map_info *map, int primary) mtd->flags = MTD_CAP_NORFLASH; mtd->name = map->name; mtd->writesize = 1; + mtd->writebufsize = 1 << cfi->cfiq->MaxBufWriteSize; + + DEBUG(MTD_DEBUG_LEVEL3, "MTD %s(): write buffer size %d\n", + __func__, mtd->writebufsize); mtd->reboot_notifier.notifier_call = cfi_amdstd_reboot; diff --git a/drivers/mtd/chips/cfi_cmdset_0020.c b/drivers/mtd/chips/cfi_cmdset_0020.c index 314af1f5a370..c04b7658abe9 100644 --- a/drivers/mtd/chips/cfi_cmdset_0020.c +++ b/drivers/mtd/chips/cfi_cmdset_0020.c @@ -238,6 +238,7 @@ static struct mtd_info *cfi_staa_setup(struct map_info *map) mtd->resume = cfi_staa_resume; mtd->flags = MTD_CAP_NORFLASH & ~MTD_BIT_WRITEABLE; mtd->writesize = 8; /* FIXME: Should be 0 for STMicro flashes w/out ECC */ + mtd->writebufsize = 1 << cfi->cfiq->MaxBufWriteSize; map->fldrv = &cfi_staa_chipdrv; __module_get(THIS_MODULE); mtd->name = map->name; diff --git a/drivers/mtd/chips/cfi_util.c b/drivers/mtd/chips/cfi_util.c index 360525c637d2..6ae3d111e1e7 100644 --- a/drivers/mtd/chips/cfi_util.c +++ b/drivers/mtd/chips/cfi_util.c @@ -156,7 +156,7 @@ void cfi_fixup(struct mtd_info *mtd, struct cfi_fixup *fixups) for (f=fixups; f->fixup; f++) { if (((f->mfr == CFI_MFR_ANY) || (f->mfr == cfi->mfr)) && ((f->id == CFI_ID_ANY) || (f->id == cfi->id))) { - f->fixup(mtd, f->param); + f->fixup(mtd); } } } diff --git a/drivers/mtd/chips/fwh_lock.h b/drivers/mtd/chips/fwh_lock.h index d18064977192..5e3cc80128aa 100644 --- a/drivers/mtd/chips/fwh_lock.h +++ b/drivers/mtd/chips/fwh_lock.h @@ -98,7 +98,7 @@ static int fwh_unlock_varsize(struct mtd_info *mtd, loff_t ofs, uint64_t len) return ret; } -static void fixup_use_fwh_lock(struct mtd_info *mtd, void *param) +static void fixup_use_fwh_lock(struct mtd_info *mtd) { printk(KERN_NOTICE "using fwh lock/unlock method\n"); /* Setup for the chips with the fwh lock method */ diff --git a/drivers/mtd/devices/m25p80.c b/drivers/mtd/devices/m25p80.c index bf5a002209bd..e4eba6cc1b2e 100644 --- a/drivers/mtd/devices/m25p80.c +++ b/drivers/mtd/devices/m25p80.c @@ -51,6 +51,10 @@ #define OPCODE_WRDI 0x04 /* Write disable */ #define OPCODE_AAI_WP 0xad /* Auto address increment word program */ +/* Used for Macronix flashes only. */ +#define OPCODE_EN4B 0xb7 /* Enter 4-byte mode */ +#define OPCODE_EX4B 0xe9 /* Exit 4-byte mode */ + /* Status Register bits. */ #define SR_WIP 1 /* Write in progress */ #define SR_WEL 2 /* Write enable latch */ @@ -62,7 +66,7 @@ /* Define max times to check status register before we give up. */ #define MAX_READY_WAIT_JIFFIES (40 * HZ) /* M25P16 specs 40s max chip erase */ -#define MAX_CMD_SIZE 4 +#define MAX_CMD_SIZE 5 #ifdef CONFIG_M25PXX_USE_FAST_READ #define OPCODE_READ OPCODE_FAST_READ @@ -152,6 +156,16 @@ static inline int write_disable(struct m25p *flash) } /* + * Enable/disable 4-byte addressing mode. + */ +static inline int set_4byte(struct m25p *flash, int enable) +{ + u8 code = enable ? OPCODE_EN4B : OPCODE_EX4B; + + return spi_write_then_read(flash->spi, &code, 1, NULL, 0); +} + +/* * Service routine to read status register until ready, or timeout occurs. * Returns non-zero if error. */ @@ -207,6 +221,7 @@ static void m25p_addr2cmd(struct m25p *flash, unsigned int addr, u8 *cmd) cmd[1] = addr >> (flash->addr_width * 8 - 8); cmd[2] = addr >> (flash->addr_width * 8 - 16); cmd[3] = addr >> (flash->addr_width * 8 - 24); + cmd[4] = addr >> (flash->addr_width * 8 - 32); } static int m25p_cmdsz(struct m25p *flash) @@ -482,6 +497,10 @@ static int sst_write(struct mtd_info *mtd, loff_t to, size_t len, size_t actual; int cmd_sz, ret; + DEBUG(MTD_DEBUG_LEVEL2, "%s: %s %s 0x%08x, len %zd\n", + dev_name(&flash->spi->dev), __func__, "to", + (u32)to, len); + *retlen = 0; /* sanity checks */ @@ -607,7 +626,6 @@ struct flash_info { .sector_size = (_sector_size), \ .n_sectors = (_n_sectors), \ .page_size = 256, \ - .addr_width = 3, \ .flags = (_flags), \ }) @@ -635,7 +653,7 @@ static const struct spi_device_id m25p_ids[] = { { "at26f004", INFO(0x1f0400, 0, 64 * 1024, 8, SECT_4K) }, { "at26df081a", INFO(0x1f4501, 0, 64 * 1024, 16, SECT_4K) }, { "at26df161a", INFO(0x1f4601, 0, 64 * 1024, 32, SECT_4K) }, - { "at26df321", INFO(0x1f4701, 0, 64 * 1024, 64, SECT_4K) }, + { "at26df321", INFO(0x1f4700, 0, 64 * 1024, 64, SECT_4K) }, /* EON -- en25pxx */ { "en25p32", INFO(0x1c2016, 0, 64 * 1024, 64, 0) }, @@ -653,6 +671,8 @@ static const struct spi_device_id m25p_ids[] = { { "mx25l6405d", INFO(0xc22017, 0, 64 * 1024, 128, 0) }, { "mx25l12805d", INFO(0xc22018, 0, 64 * 1024, 256, 0) }, { "mx25l12855e", INFO(0xc22618, 0, 64 * 1024, 256, 0) }, + { "mx25l25635e", INFO(0xc22019, 0, 64 * 1024, 512, 0) }, + { "mx25l25655e", INFO(0xc22619, 0, 64 * 1024, 512, 0) }, /* Spansion -- single (large) sector size only, at least * for the chips listed here (without boot sectors). @@ -764,6 +784,7 @@ static const struct spi_device_id *__devinit jedec_probe(struct spi_device *spi) return &m25p_ids[tmp]; } } + dev_err(&spi->dev, "unrecognized JEDEC id %06x\n", jedec); return ERR_PTR(-ENODEV); } @@ -883,7 +904,17 @@ static int __devinit m25p_probe(struct spi_device *spi) flash->mtd.dev.parent = &spi->dev; flash->page_size = info->page_size; - flash->addr_width = info->addr_width; + + if (info->addr_width) + flash->addr_width = info->addr_width; + else { + /* enable 4-byte addressing if the device exceeds 16MiB */ + if (flash->mtd.size > 0x1000000) { + flash->addr_width = 4; + set_4byte(flash, 1); + } else + flash->addr_width = 3; + } dev_info(&spi->dev, "%s (%lld Kbytes)\n", id->name, (long long)flash->mtd.size >> 10); diff --git a/drivers/mtd/devices/sst25l.c b/drivers/mtd/devices/sst25l.c index 684247a8a5ed..c163e619abc9 100644 --- a/drivers/mtd/devices/sst25l.c +++ b/drivers/mtd/devices/sst25l.c @@ -335,7 +335,7 @@ out: return ret; } -static struct flash_info *__init sst25l_match_device(struct spi_device *spi) +static struct flash_info *__devinit sst25l_match_device(struct spi_device *spi) { struct flash_info *flash_info = NULL; struct spi_message m; @@ -375,7 +375,7 @@ static struct flash_info *__init sst25l_match_device(struct spi_device *spi) return flash_info; } -static int __init sst25l_probe(struct spi_device *spi) +static int __devinit sst25l_probe(struct spi_device *spi) { struct flash_info *flash_info; struct sst25l_flash *flash; diff --git a/drivers/mtd/maps/amd76xrom.c b/drivers/mtd/maps/amd76xrom.c index 19fe92db0c46..77d64ce19e9f 100644 --- a/drivers/mtd/maps/amd76xrom.c +++ b/drivers/mtd/maps/amd76xrom.c @@ -149,11 +149,8 @@ static int __devinit amd76xrom_init_one (struct pci_dev *pdev, if (request_resource(&iomem_resource, &window->rsrc)) { window->rsrc.parent = NULL; printk(KERN_ERR MOD_NAME - " %s(): Unable to register resource" - " 0x%.16llx-0x%.16llx - kernel bug?\n", - __func__, - (unsigned long long)window->rsrc.start, - (unsigned long long)window->rsrc.end); + " %s(): Unable to register resource %pR - kernel bug?\n", + __func__, &window->rsrc); } diff --git a/drivers/mtd/maps/bcm963xx-flash.c b/drivers/mtd/maps/bcm963xx-flash.c index d175c120ee84..1f3049590d9e 100644 --- a/drivers/mtd/maps/bcm963xx-flash.c +++ b/drivers/mtd/maps/bcm963xx-flash.c @@ -196,10 +196,15 @@ static int bcm963xx_probe(struct platform_device *pdev) bcm963xx_mtd_info = do_map_probe("cfi_probe", &bcm963xx_map); if (!bcm963xx_mtd_info) { dev_err(&pdev->dev, "failed to probe using CFI\n"); + bcm963xx_mtd_info = do_map_probe("jedec_probe", &bcm963xx_map); + if (bcm963xx_mtd_info) + goto probe_ok; + dev_err(&pdev->dev, "failed to probe using JEDEC\n"); err = -EIO; goto err_probe; } +probe_ok: bcm963xx_mtd_info->owner = THIS_MODULE; /* This is mutually exclusive */ diff --git a/drivers/mtd/maps/ck804xrom.c b/drivers/mtd/maps/ck804xrom.c index ddb462bea9b5..5fdb7b26cea3 100644 --- a/drivers/mtd/maps/ck804xrom.c +++ b/drivers/mtd/maps/ck804xrom.c @@ -178,11 +178,8 @@ static int __devinit ck804xrom_init_one (struct pci_dev *pdev, if (request_resource(&iomem_resource, &window->rsrc)) { window->rsrc.parent = NULL; printk(KERN_ERR MOD_NAME - " %s(): Unable to register resource" - " 0x%.016llx-0x%.016llx - kernel bug?\n", - __func__, - (unsigned long long)window->rsrc.start, - (unsigned long long)window->rsrc.end); + " %s(): Unable to register resource %pR - kernel bug?\n", + __func__, &window->rsrc); } diff --git a/drivers/mtd/maps/esb2rom.c b/drivers/mtd/maps/esb2rom.c index d12c93dc1aad..4feb7507ab7c 100644 --- a/drivers/mtd/maps/esb2rom.c +++ b/drivers/mtd/maps/esb2rom.c @@ -242,12 +242,9 @@ static int __devinit esb2rom_init_one(struct pci_dev *pdev, window->rsrc.flags = IORESOURCE_MEM | IORESOURCE_BUSY; if (request_resource(&iomem_resource, &window->rsrc)) { window->rsrc.parent = NULL; - printk(KERN_DEBUG MOD_NAME - ": %s(): Unable to register resource" - " 0x%.08llx-0x%.08llx - kernel bug?\n", - __func__, - (unsigned long long)window->rsrc.start, - (unsigned long long)window->rsrc.end); + printk(KERN_DEBUG MOD_NAME ": " + "%s(): Unable to register resource %pR - kernel bug?\n", + __func__, &window->rsrc); } /* Map the firmware hub into my address space. */ diff --git a/drivers/mtd/maps/ichxrom.c b/drivers/mtd/maps/ichxrom.c index f102bf243a74..1337a4191a0c 100644 --- a/drivers/mtd/maps/ichxrom.c +++ b/drivers/mtd/maps/ichxrom.c @@ -175,12 +175,9 @@ static int __devinit ichxrom_init_one (struct pci_dev *pdev, window->rsrc.flags = IORESOURCE_MEM | IORESOURCE_BUSY; if (request_resource(&iomem_resource, &window->rsrc)) { window->rsrc.parent = NULL; - printk(KERN_DEBUG MOD_NAME - ": %s(): Unable to register resource" - " 0x%.16llx-0x%.16llx - kernel bug?\n", - __func__, - (unsigned long long)window->rsrc.start, - (unsigned long long)window->rsrc.end); + printk(KERN_DEBUG MOD_NAME ": " + "%s(): Unable to register resource %pR - kernel bug?\n", + __func__, &window->rsrc); } /* Map the firmware hub into my address space. */ diff --git a/drivers/mtd/maps/physmap_of.c b/drivers/mtd/maps/physmap_of.c index 9861814aa027..8506578e6a35 100644 --- a/drivers/mtd/maps/physmap_of.c +++ b/drivers/mtd/maps/physmap_of.c @@ -274,9 +274,7 @@ static int __devinit of_flash_probe(struct platform_device *dev, continue; } - dev_dbg(&dev->dev, "of_flash device: %.8llx-%.8llx\n", - (unsigned long long)res.start, - (unsigned long long)res.end); + dev_dbg(&dev->dev, "of_flash device: %pR\n", &res); err = -EBUSY; res_size = resource_size(&res); diff --git a/drivers/mtd/maps/scx200_docflash.c b/drivers/mtd/maps/scx200_docflash.c index b5391ebb736e..027e628a4f1d 100644 --- a/drivers/mtd/maps/scx200_docflash.c +++ b/drivers/mtd/maps/scx200_docflash.c @@ -166,9 +166,8 @@ static int __init init_scx200_docflash(void) outl(pmr, scx200_cb_base + SCx200_PMR); } - printk(KERN_INFO NAME ": DOCCS mapped at 0x%llx-0x%llx, width %d\n", - (unsigned long long)docmem.start, - (unsigned long long)docmem.end, width); + printk(KERN_INFO NAME ": DOCCS mapped at %pR, width %d\n", + &docmem, width); scx200_docflash_map.size = size; if (width == 8) diff --git a/drivers/mtd/maps/tqm8xxl.c b/drivers/mtd/maps/tqm8xxl.c index 60146984f4be..c08e140d40ed 100644 --- a/drivers/mtd/maps/tqm8xxl.c +++ b/drivers/mtd/maps/tqm8xxl.c @@ -139,7 +139,7 @@ static int __init init_tqm_mtd(void) goto error_mem; } - map_banks[idx]->name = (char *)kmalloc(16, GFP_KERNEL); + map_banks[idx]->name = kmalloc(16, GFP_KERNEL); if (!map_banks[idx]->name) { ret = -ENOMEM; diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c index 98240575a18d..145b3d0dc0db 100644 --- a/drivers/mtd/mtdchar.c +++ b/drivers/mtd/mtdchar.c @@ -522,10 +522,6 @@ static int mtd_blkpg_ioctl(struct mtd_info *mtd, if (!capable(CAP_SYS_ADMIN)) return -EPERM; - /* Only master mtd device must be used to control partitions */ - if (!mtd_is_master(mtd)) - return -EINVAL; - if (copy_from_user(&a, arg, sizeof(struct blkpg_ioctl_arg))) return -EFAULT; @@ -535,6 +531,10 @@ static int mtd_blkpg_ioctl(struct mtd_info *mtd, switch (a.op) { case BLKPG_ADD_PARTITION: + /* Only master mtd device must be used to add partitions */ + if (mtd_is_partition(mtd)) + return -EINVAL; + return mtd_add_partition(mtd, p.devname, p.start, p.length); case BLKPG_DEL_PARTITION: @@ -601,6 +601,7 @@ static int mtd_ioctl(struct file *file, u_int cmd, u_long arg) } case MEMGETINFO: + memset(&info, 0, sizeof(info)); info.type = mtd->type; info.flags = mtd->flags; info.size = mtd->size; @@ -609,7 +610,6 @@ static int mtd_ioctl(struct file *file, u_int cmd, u_long arg) info.oobsize = mtd->oobsize; /* The below fields are obsolete */ info.ecctype = -1; - info.eccsize = 0; if (copy_to_user(argp, &info, sizeof(struct mtd_info_user))) return -EFAULT; break; diff --git a/drivers/mtd/mtdconcat.c b/drivers/mtd/mtdconcat.c index bf8de0943103..5f5777bd3f75 100644 --- a/drivers/mtd/mtdconcat.c +++ b/drivers/mtd/mtdconcat.c @@ -776,6 +776,7 @@ struct mtd_info *mtd_concat_create(struct mtd_info *subdev[], /* subdevices to c concat->mtd.size = subdev[0]->size; concat->mtd.erasesize = subdev[0]->erasesize; concat->mtd.writesize = subdev[0]->writesize; + concat->mtd.writebufsize = subdev[0]->writebufsize; concat->mtd.subpage_sft = subdev[0]->subpage_sft; concat->mtd.oobsize = subdev[0]->oobsize; concat->mtd.oobavail = subdev[0]->oobavail; diff --git a/drivers/mtd/mtdoops.c b/drivers/mtd/mtdoops.c index c948150079be..e3e40f440323 100644 --- a/drivers/mtd/mtdoops.c +++ b/drivers/mtd/mtdoops.c @@ -401,7 +401,8 @@ static void mtdoops_notify_remove(struct mtd_info *mtd) printk(KERN_WARNING "mtdoops: could not unregister kmsg_dumper\n"); cxt->mtd = NULL; - flush_scheduled_work(); + flush_work_sync(&cxt->work_erase); + flush_work_sync(&cxt->work_write); } diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c index 79e3689f1e16..0a4760174782 100644 --- a/drivers/mtd/mtdpart.c +++ b/drivers/mtd/mtdpart.c @@ -120,8 +120,25 @@ static int part_read_oob(struct mtd_info *mtd, loff_t from, return -EINVAL; if (ops->datbuf && from + ops->len > mtd->size) return -EINVAL; - res = part->master->read_oob(part->master, from + part->offset, ops); + /* + * If OOB is also requested, make sure that we do not read past the end + * of this partition. + */ + if (ops->oobbuf) { + size_t len, pages; + + if (ops->mode == MTD_OOB_AUTO) + len = mtd->oobavail; + else + len = mtd->oobsize; + pages = mtd_div_by_ws(mtd->size, mtd); + pages -= mtd_div_by_ws(from, mtd); + if (ops->ooboffs + ops->ooblen > pages * len) + return -EINVAL; + } + + res = part->master->read_oob(part->master, from + part->offset, ops); if (unlikely(res)) { if (res == -EUCLEAN) mtd->ecc_stats.corrected++; @@ -384,6 +401,7 @@ static struct mtd_part *allocate_partition(struct mtd_info *master, slave->mtd.flags = master->flags & ~part->mask_flags; slave->mtd.size = part->size; slave->mtd.writesize = master->writesize; + slave->mtd.writebufsize = master->writebufsize; slave->mtd.oobsize = master->oobsize; slave->mtd.oobavail = master->oobavail; slave->mtd.subpage_sft = master->subpage_sft; @@ -720,19 +738,19 @@ int parse_mtd_partitions(struct mtd_info *master, const char **types, } EXPORT_SYMBOL_GPL(parse_mtd_partitions); -int mtd_is_master(struct mtd_info *mtd) +int mtd_is_partition(struct mtd_info *mtd) { struct mtd_part *part; - int nopart = 0; + int ispart = 0; mutex_lock(&mtd_partitions_mutex); list_for_each_entry(part, &mtd_partitions, list) if (&part->mtd == mtd) { - nopart = 1; + ispart = 1; break; } mutex_unlock(&mtd_partitions_mutex); - return nopart; + return ispart; } -EXPORT_SYMBOL_GPL(mtd_is_master); +EXPORT_SYMBOL_GPL(mtd_is_partition); diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig index 8229802b4346..c89592239bc7 100644 --- a/drivers/mtd/nand/Kconfig +++ b/drivers/mtd/nand/Kconfig @@ -96,6 +96,7 @@ config MTD_NAND_SPIA config MTD_NAND_AMS_DELTA tristate "NAND Flash device on Amstrad E3" depends on MACH_AMS_DELTA + default y help Support for NAND flash on Amstrad E3 (Delta). diff --git a/drivers/mtd/nand/ams-delta.c b/drivers/mtd/nand/ams-delta.c index 2548e1065bf8..a067d090cb31 100644 --- a/drivers/mtd/nand/ams-delta.c +++ b/drivers/mtd/nand/ams-delta.c @@ -4,6 +4,8 @@ * Copyright (C) 2006 Jonathan McDowell <noodles@earth.li> * * Derived from drivers/mtd/toto.c + * Converted to platform driver by Janusz Krzysztofik <jkrzyszt@tis.icnet.pl> + * Partially stolen from drivers/mtd/nand/plat_nand.c * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -62,9 +64,10 @@ static struct mtd_partition partition_info[] = { static void ams_delta_write_byte(struct mtd_info *mtd, u_char byte) { struct nand_chip *this = mtd->priv; + void __iomem *io_base = this->priv; - omap_writew(0, (OMAP1_MPUIO_BASE + OMAP_MPUIO_IO_CNTL)); - omap_writew(byte, this->IO_ADDR_W); + writew(0, io_base + OMAP_MPUIO_IO_CNTL); + writew(byte, this->IO_ADDR_W); ams_delta_latch2_write(AMS_DELTA_LATCH2_NAND_NWE, 0); ndelay(40); ams_delta_latch2_write(AMS_DELTA_LATCH2_NAND_NWE, @@ -75,11 +78,12 @@ static u_char ams_delta_read_byte(struct mtd_info *mtd) { u_char res; struct nand_chip *this = mtd->priv; + void __iomem *io_base = this->priv; ams_delta_latch2_write(AMS_DELTA_LATCH2_NAND_NRE, 0); ndelay(40); - omap_writew(~0, (OMAP1_MPUIO_BASE + OMAP_MPUIO_IO_CNTL)); - res = omap_readw(this->IO_ADDR_R); + writew(~0, io_base + OMAP_MPUIO_IO_CNTL); + res = readw(this->IO_ADDR_R); ams_delta_latch2_write(AMS_DELTA_LATCH2_NAND_NRE, AMS_DELTA_LATCH2_NAND_NRE); @@ -151,11 +155,16 @@ static int ams_delta_nand_ready(struct mtd_info *mtd) /* * Main initialization routine */ -static int __init ams_delta_init(void) +static int __devinit ams_delta_init(struct platform_device *pdev) { struct nand_chip *this; + struct resource *res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + void __iomem *io_base; int err = 0; + if (!res) + return -ENXIO; + /* Allocate memory for MTD device structure and private data */ ams_delta_mtd = kmalloc(sizeof(struct mtd_info) + sizeof(struct nand_chip), GFP_KERNEL); @@ -177,9 +186,25 @@ static int __init ams_delta_init(void) /* Link the private data with the MTD structure */ ams_delta_mtd->priv = this; + if (!request_mem_region(res->start, resource_size(res), + dev_name(&pdev->dev))) { + dev_err(&pdev->dev, "request_mem_region failed\n"); + err = -EBUSY; + goto out_free; + } + + io_base = ioremap(res->start, resource_size(res)); + if (io_base == NULL) { + dev_err(&pdev->dev, "ioremap failed\n"); + err = -EIO; + goto out_release_io; + } + + this->priv = io_base; + /* Set address of NAND IO lines */ - this->IO_ADDR_R = (OMAP1_MPUIO_BASE + OMAP_MPUIO_INPUT_LATCH); - this->IO_ADDR_W = (OMAP1_MPUIO_BASE + OMAP_MPUIO_OUTPUT); + this->IO_ADDR_R = io_base + OMAP_MPUIO_INPUT_LATCH; + this->IO_ADDR_W = io_base + OMAP_MPUIO_OUTPUT; this->read_byte = ams_delta_read_byte; this->write_buf = ams_delta_write_buf; this->read_buf = ams_delta_read_buf; @@ -195,6 +220,8 @@ static int __init ams_delta_init(void) this->chip_delay = 30; this->ecc.mode = NAND_ECC_SOFT; + platform_set_drvdata(pdev, io_base); + /* Set chip enabled, but */ ams_delta_latch2_write(NAND_MASK, AMS_DELTA_LATCH2_NAND_NRE | AMS_DELTA_LATCH2_NAND_NWE | @@ -214,25 +241,56 @@ static int __init ams_delta_init(void) goto out; out_mtd: + platform_set_drvdata(pdev, NULL); + iounmap(io_base); +out_release_io: + release_mem_region(res->start, resource_size(res)); +out_free: kfree(ams_delta_mtd); out: return err; } -module_init(ams_delta_init); - /* * Clean up routine */ -static void __exit ams_delta_cleanup(void) +static int __devexit ams_delta_cleanup(struct platform_device *pdev) { + void __iomem *io_base = platform_get_drvdata(pdev); + struct resource *res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + /* Release resources, unregister device */ nand_release(ams_delta_mtd); + iounmap(io_base); + release_mem_region(res->start, resource_size(res)); + /* Free the MTD device structure */ kfree(ams_delta_mtd); + + return 0; +} + +static struct platform_driver ams_delta_nand_driver = { + .probe = ams_delta_init, + .remove = __devexit_p(ams_delta_cleanup), + .driver = { + .name = "ams-delta-nand", + .owner = THIS_MODULE, + }, +}; + +static int __init ams_delta_nand_init(void) +{ + return platform_driver_register(&ams_delta_nand_driver); +} +module_init(ams_delta_nand_init); + +static void __exit ams_delta_nand_exit(void) +{ + platform_driver_unregister(&ams_delta_nand_driver); } -module_exit(ams_delta_cleanup); +module_exit(ams_delta_nand_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jonathan McDowell <noodles@earth.li>"); diff --git a/drivers/mtd/nand/fsl_elbc_nand.c b/drivers/mtd/nand/fsl_elbc_nand.c index c141b07b25d1..7a13d42cbabd 100644 --- a/drivers/mtd/nand/fsl_elbc_nand.c +++ b/drivers/mtd/nand/fsl_elbc_nand.c @@ -388,6 +388,8 @@ static void fsl_elbc_cmdfunc(struct mtd_info *mtd, unsigned int command, "page_addr: 0x%x, column: 0x%x.\n", page_addr, column); + elbc_fcm_ctrl->column = column; + elbc_fcm_ctrl->oob = 0; elbc_fcm_ctrl->use_mdr = 1; fcr = (NAND_CMD_STATUS << FCR_CMD1_SHIFT) | diff --git a/drivers/mtd/nand/fsmc_nand.c b/drivers/mtd/nand/fsmc_nand.c index 02edfba25b0c..205b10b9f9b9 100644 --- a/drivers/mtd/nand/fsmc_nand.c +++ b/drivers/mtd/nand/fsmc_nand.c @@ -31,6 +31,7 @@ #include <linux/io.h> #include <linux/slab.h> #include <linux/mtd/fsmc.h> +#include <linux/amba/bus.h> #include <mtd/mtd-abi.h> static struct nand_ecclayout fsmc_ecc1_layout = { @@ -119,21 +120,36 @@ static struct fsmc_eccplace fsmc_ecc4_sp_place = { } }; -/* - * Default partition tables to be used if the partition information not - * provided through platform data - */ -#define PARTITION(n, off, sz) {.name = n, .offset = off, .size = sz} +#ifdef CONFIG_MTD_PARTITIONS /* + * Default partition tables to be used if the partition information not + * provided through platform data. + * * Default partition layout for small page(= 512 bytes) devices * Size for "Root file system" is updated in driver based on actual device size */ static struct mtd_partition partition_info_16KB_blk[] = { - PARTITION("X-loader", 0, 4 * 0x4000), - PARTITION("U-Boot", 0x10000, 20 * 0x4000), - PARTITION("Kernel", 0x60000, 256 * 0x4000), - PARTITION("Root File System", 0x460000, 0), + { + .name = "X-loader", + .offset = 0, + .size = 4*0x4000, + }, + { + .name = "U-Boot", + .offset = 0x10000, + .size = 20*0x4000, + }, + { + .name = "Kernel", + .offset = 0x60000, + .size = 256*0x4000, + }, + { + .name = "Root File System", + .offset = 0x460000, + .size = 0, + }, }; /* @@ -141,19 +157,37 @@ static struct mtd_partition partition_info_16KB_blk[] = { * Size for "Root file system" is updated in driver based on actual device size */ static struct mtd_partition partition_info_128KB_blk[] = { - PARTITION("X-loader", 0, 4 * 0x20000), - PARTITION("U-Boot", 0x80000, 12 * 0x20000), - PARTITION("Kernel", 0x200000, 48 * 0x20000), - PARTITION("Root File System", 0x800000, 0), + { + .name = "X-loader", + .offset = 0, + .size = 4*0x20000, + }, + { + .name = "U-Boot", + .offset = 0x80000, + .size = 12*0x20000, + }, + { + .name = "Kernel", + .offset = 0x200000, + .size = 48*0x20000, + }, + { + .name = "Root File System", + .offset = 0x800000, + .size = 0, + }, }; #ifdef CONFIG_MTD_CMDLINE_PARTS const char *part_probes[] = { "cmdlinepart", NULL }; #endif +#endif /** - * struct fsmc_nand_data - atructure for FSMC NAND device state + * struct fsmc_nand_data - structure for FSMC NAND device state * + * @pid: Part ID on the AMBA PrimeCell format * @mtd: MTD info for a NAND flash. * @nand: Chip related info for a NAND flash. * @partitions: Partition info for a NAND Flash. @@ -169,6 +203,7 @@ const char *part_probes[] = { "cmdlinepart", NULL }; * @regs_va: FSMC regs base address. */ struct fsmc_nand_data { + u32 pid; struct mtd_info mtd; struct nand_chip nand; struct mtd_partition *partitions; @@ -508,7 +543,9 @@ static int __init fsmc_nand_probe(struct platform_device *pdev) struct nand_chip *nand; struct fsmc_regs *regs; struct resource *res; - int nr_parts, ret = 0; + int ret = 0; + u32 pid; + int i; if (!pdata) { dev_err(&pdev->dev, "platform data is NULL\n"); @@ -598,6 +635,18 @@ static int __init fsmc_nand_probe(struct platform_device *pdev) if (ret) goto err_probe1; + /* + * This device ID is actually a common AMBA ID as used on the + * AMBA PrimeCell bus. However it is not a PrimeCell. + */ + for (pid = 0, i = 0; i < 4; i++) + pid |= (readl(host->regs_va + resource_size(res) - 0x20 + 4 * i) & 255) << (i * 8); + host->pid = pid; + dev_info(&pdev->dev, "FSMC device partno %03x, manufacturer %02x, " + "revision %02x, config %02x\n", + AMBA_PART_BITS(pid), AMBA_MANF_BITS(pid), + AMBA_REV_BITS(pid), AMBA_CONFIG_BITS(pid)); + host->bank = pdata->bank; host->select_chip = pdata->select_bank; regs = host->regs_va; @@ -625,7 +674,7 @@ static int __init fsmc_nand_probe(struct platform_device *pdev) fsmc_nand_setup(regs, host->bank, nand->options & NAND_BUSWIDTH_16); - if (get_fsmc_version(host->regs_va) == FSMC_VER8) { + if (AMBA_REV_BITS(host->pid) >= 8) { nand->ecc.read_page = fsmc_read_page_hwecc; nand->ecc.calculate = fsmc_read_hwecc_ecc4; nand->ecc.correct = fsmc_correct_data; @@ -645,7 +694,7 @@ static int __init fsmc_nand_probe(struct platform_device *pdev) goto err_probe; } - if (get_fsmc_version(host->regs_va) == FSMC_VER8) { + if (AMBA_REV_BITS(host->pid) >= 8) { if (host->mtd.writesize == 512) { nand->ecc.layout = &fsmc_ecc4_sp_layout; host->ecc_place = &fsmc_ecc4_sp_place; @@ -676,11 +725,9 @@ static int __init fsmc_nand_probe(struct platform_device *pdev) * Check if partition info passed via command line */ host->mtd.name = "nand"; - nr_parts = parse_mtd_partitions(&host->mtd, part_probes, + host->nr_partitions = parse_mtd_partitions(&host->mtd, part_probes, &host->partitions, 0); - if (nr_parts > 0) { - host->nr_partitions = nr_parts; - } else { + if (host->nr_partitions <= 0) { #endif /* * Check if partition info passed via command line diff --git a/drivers/mtd/nand/jz4740_nand.c b/drivers/mtd/nand/jz4740_nand.c index 67343fc31bd5..cea38a5d4ac5 100644 --- a/drivers/mtd/nand/jz4740_nand.c +++ b/drivers/mtd/nand/jz4740_nand.c @@ -251,58 +251,6 @@ static int jz_nand_correct_ecc_rs(struct mtd_info *mtd, uint8_t *dat, return 0; } - -/* Copy paste of nand_read_page_hwecc_oob_first except for different eccpos - * handling. The ecc area is for 4k chips 72 bytes long and thus does not fit - * into the eccpos array. */ -static int jz_nand_read_page_hwecc_oob_first(struct mtd_info *mtd, - struct nand_chip *chip, uint8_t *buf, int page) -{ - int i, eccsize = chip->ecc.size; - int eccbytes = chip->ecc.bytes; - int eccsteps = chip->ecc.steps; - uint8_t *p = buf; - unsigned int ecc_offset = chip->page_shift; - - /* Read the OOB area first */ - chip->cmdfunc(mtd, NAND_CMD_READOOB, 0, page); - chip->read_buf(mtd, chip->oob_poi, mtd->oobsize); - chip->cmdfunc(mtd, NAND_CMD_READ0, 0, page); - - for (i = ecc_offset; eccsteps; eccsteps--, i += eccbytes, p += eccsize) { - int stat; - - chip->ecc.hwctl(mtd, NAND_ECC_READ); - chip->read_buf(mtd, p, eccsize); - - stat = chip->ecc.correct(mtd, p, &chip->oob_poi[i], NULL); - if (stat < 0) - mtd->ecc_stats.failed++; - else - mtd->ecc_stats.corrected += stat; - } - return 0; -} - -/* Copy-and-paste of nand_write_page_hwecc with different eccpos handling. */ -static void jz_nand_write_page_hwecc(struct mtd_info *mtd, - struct nand_chip *chip, const uint8_t *buf) -{ - int i, eccsize = chip->ecc.size; - int eccbytes = chip->ecc.bytes; - int eccsteps = chip->ecc.steps; - const uint8_t *p = buf; - unsigned int ecc_offset = chip->page_shift; - - for (i = ecc_offset; eccsteps; eccsteps--, i += eccbytes, p += eccsize) { - chip->ecc.hwctl(mtd, NAND_ECC_WRITE); - chip->write_buf(mtd, p, eccsize); - chip->ecc.calculate(mtd, p, &chip->oob_poi[i]); - } - - chip->write_buf(mtd, chip->oob_poi, mtd->oobsize); -} - #ifdef CONFIG_MTD_CMDLINE_PARTS static const char *part_probes[] = {"cmdline", NULL}; #endif @@ -393,9 +341,6 @@ static int __devinit jz_nand_probe(struct platform_device *pdev) chip->ecc.size = 512; chip->ecc.bytes = 9; - chip->ecc.read_page = jz_nand_read_page_hwecc_oob_first; - chip->ecc.write_page = jz_nand_write_page_hwecc; - if (pdata) chip->ecc.layout = pdata->ecc_layout; @@ -489,7 +434,7 @@ static int __devexit jz_nand_remove(struct platform_device *pdev) return 0; } -struct platform_driver jz_nand_driver = { +static struct platform_driver jz_nand_driver = { .probe = jz_nand_probe, .remove = __devexit_p(jz_nand_remove), .driver = { diff --git a/drivers/mtd/nand/mxc_nand.c b/drivers/mtd/nand/mxc_nand.c index 214b03afdd48..ef932ba55a0b 100644 --- a/drivers/mtd/nand/mxc_nand.c +++ b/drivers/mtd/nand/mxc_nand.c @@ -1009,7 +1009,7 @@ static int __init mxcnd_probe(struct platform_device *pdev) struct mxc_nand_platform_data *pdata = pdev->dev.platform_data; struct mxc_nand_host *host; struct resource *res; - int err = 0, nr_parts = 0; + int err = 0, __maybe_unused nr_parts = 0; struct nand_ecclayout *oob_smallpage, *oob_largepage; /* Allocate memory for MTD device structure and private data */ diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c index 31bf376b82a0..a9c6ce745767 100644 --- a/drivers/mtd/nand/nand_base.c +++ b/drivers/mtd/nand/nand_base.c @@ -2865,20 +2865,24 @@ static int nand_flash_detect_onfi(struct mtd_info *mtd, struct nand_chip *chip, /* check version */ val = le16_to_cpu(p->revision); - if (val == 1 || val > (1 << 4)) { - printk(KERN_INFO "%s: unsupported ONFI version: %d\n", - __func__, val); - return 0; - } - - if (val & (1 << 4)) + if (val & (1 << 5)) + chip->onfi_version = 23; + else if (val & (1 << 4)) chip->onfi_version = 22; else if (val & (1 << 3)) chip->onfi_version = 21; else if (val & (1 << 2)) chip->onfi_version = 20; - else + else if (val & (1 << 1)) chip->onfi_version = 10; + else + chip->onfi_version = 0; + + if (!chip->onfi_version) { + printk(KERN_INFO "%s: unsupported ONFI version: %d\n", + __func__, val); + return 0; + } sanitize_string(p->manufacturer, sizeof(p->manufacturer)); sanitize_string(p->model, sizeof(p->model)); @@ -2887,7 +2891,7 @@ static int nand_flash_detect_onfi(struct mtd_info *mtd, struct nand_chip *chip, mtd->writesize = le32_to_cpu(p->byte_per_page); mtd->erasesize = le32_to_cpu(p->pages_per_block) * mtd->writesize; mtd->oobsize = le16_to_cpu(p->spare_bytes_per_page); - chip->chipsize = le32_to_cpu(p->blocks_per_lun) * mtd->erasesize; + chip->chipsize = (uint64_t)le32_to_cpu(p->blocks_per_lun) * mtd->erasesize; busw = 0; if (le16_to_cpu(p->features) & 1) busw = NAND_BUSWIDTH_16; @@ -3157,7 +3161,7 @@ ident_done: printk(KERN_INFO "NAND device: Manufacturer ID:" " 0x%02x, Chip ID: 0x%02x (%s %s)\n", *maf_id, *dev_id, nand_manuf_ids[maf_idx].name, - chip->onfi_version ? type->name : chip->onfi_params.model); + chip->onfi_version ? chip->onfi_params.model : type->name); return type; } @@ -3435,6 +3439,7 @@ int nand_scan_tail(struct mtd_info *mtd) mtd->resume = nand_resume; mtd->block_isbad = nand_block_isbad; mtd->block_markbad = nand_block_markbad; + mtd->writebufsize = mtd->writesize; /* propagate ecc.layout to mtd_info */ mtd->ecclayout = chip->ecc.layout; diff --git a/drivers/mtd/nand/nand_bbt.c b/drivers/mtd/nand/nand_bbt.c index 586b981f0e61..6ebd869993aa 100644 --- a/drivers/mtd/nand/nand_bbt.c +++ b/drivers/mtd/nand/nand_bbt.c @@ -1092,7 +1092,8 @@ static void mark_bbt_region(struct mtd_info *mtd, struct nand_bbt_descr *td) /** * verify_bbt_descr - verify the bad block description - * @bd: the table to verify + * @mtd: MTD device structure + * @bd: the table to verify * * This functions performs a few sanity checks on the bad block description * table. diff --git a/drivers/mtd/nand/nandsim.c b/drivers/mtd/nand/nandsim.c index a6a73aab1253..a5aa99f014ba 100644 --- a/drivers/mtd/nand/nandsim.c +++ b/drivers/mtd/nand/nandsim.c @@ -210,12 +210,12 @@ MODULE_PARM_DESC(bbt, "0 OOB, 1 BBT with marker in OOB, 2 BBT with marker in d #define STATE_CMD_READ0 0x00000001 /* read data from the beginning of page */ #define STATE_CMD_READ1 0x00000002 /* read data from the second half of page */ #define STATE_CMD_READSTART 0x00000003 /* read data second command (large page devices) */ -#define STATE_CMD_PAGEPROG 0x00000004 /* start page programm */ +#define STATE_CMD_PAGEPROG 0x00000004 /* start page program */ #define STATE_CMD_READOOB 0x00000005 /* read OOB area */ #define STATE_CMD_ERASE1 0x00000006 /* sector erase first command */ #define STATE_CMD_STATUS 0x00000007 /* read status */ #define STATE_CMD_STATUS_M 0x00000008 /* read multi-plane status (isn't implemented) */ -#define STATE_CMD_SEQIN 0x00000009 /* sequential data imput */ +#define STATE_CMD_SEQIN 0x00000009 /* sequential data input */ #define STATE_CMD_READID 0x0000000A /* read ID */ #define STATE_CMD_ERASE2 0x0000000B /* sector erase second command */ #define STATE_CMD_RESET 0x0000000C /* reset */ @@ -230,7 +230,7 @@ MODULE_PARM_DESC(bbt, "0 OOB, 1 BBT with marker in OOB, 2 BBT with marker in d #define STATE_ADDR_ZERO 0x00000040 /* one byte zero address was accepted */ #define STATE_ADDR_MASK 0x00000070 /* address states mask */ -/* Durind data input/output the simulator is in these states */ +/* During data input/output the simulator is in these states */ #define STATE_DATAIN 0x00000100 /* waiting for data input */ #define STATE_DATAIN_MASK 0x00000100 /* data input states mask */ @@ -248,7 +248,7 @@ MODULE_PARM_DESC(bbt, "0 OOB, 1 BBT with marker in OOB, 2 BBT with marker in d /* Simulator's actions bit masks */ #define ACTION_CPY 0x00100000 /* copy page/OOB to the internal buffer */ -#define ACTION_PRGPAGE 0x00200000 /* programm the internal buffer to flash */ +#define ACTION_PRGPAGE 0x00200000 /* program the internal buffer to flash */ #define ACTION_SECERASE 0x00300000 /* erase sector */ #define ACTION_ZEROOFF 0x00400000 /* don't add any offset to address */ #define ACTION_HALFOFF 0x00500000 /* add to address half of page */ @@ -263,18 +263,18 @@ MODULE_PARM_DESC(bbt, "0 OOB, 1 BBT with marker in OOB, 2 BBT with marker in d #define OPT_PAGE512 0x00000002 /* 512-byte page chips */ #define OPT_PAGE2048 0x00000008 /* 2048-byte page chips */ #define OPT_SMARTMEDIA 0x00000010 /* SmartMedia technology chips */ -#define OPT_AUTOINCR 0x00000020 /* page number auto inctimentation is possible */ +#define OPT_AUTOINCR 0x00000020 /* page number auto incrementation is possible */ #define OPT_PAGE512_8BIT 0x00000040 /* 512-byte page chips with 8-bit bus width */ #define OPT_PAGE4096 0x00000080 /* 4096-byte page chips */ #define OPT_LARGEPAGE (OPT_PAGE2048 | OPT_PAGE4096) /* 2048 & 4096-byte page chips */ #define OPT_SMALLPAGE (OPT_PAGE256 | OPT_PAGE512) /* 256 and 512-byte page chips */ -/* Remove action bits ftom state */ +/* Remove action bits from state */ #define NS_STATE(x) ((x) & ~ACTION_MASK) /* * Maximum previous states which need to be saved. Currently saving is - * only needed for page programm operation with preceeded read command + * only needed for page program operation with preceded read command * (which is only valid for 512-byte pages). */ #define NS_MAX_PREVSTATES 1 @@ -380,16 +380,16 @@ static struct nandsim_operations { /* Read OOB */ {OPT_SMALLPAGE, {STATE_CMD_READOOB | ACTION_OOBOFF, STATE_ADDR_PAGE | ACTION_CPY, STATE_DATAOUT, STATE_READY}}, - /* Programm page starting from the beginning */ + /* Program page starting from the beginning */ {OPT_ANY, {STATE_CMD_SEQIN, STATE_ADDR_PAGE, STATE_DATAIN, STATE_CMD_PAGEPROG | ACTION_PRGPAGE, STATE_READY}}, - /* Programm page starting from the beginning */ + /* Program page starting from the beginning */ {OPT_SMALLPAGE, {STATE_CMD_READ0, STATE_CMD_SEQIN | ACTION_ZEROOFF, STATE_ADDR_PAGE, STATE_DATAIN, STATE_CMD_PAGEPROG | ACTION_PRGPAGE, STATE_READY}}, - /* Programm page starting from the second half */ + /* Program page starting from the second half */ {OPT_PAGE512, {STATE_CMD_READ1, STATE_CMD_SEQIN | ACTION_HALFOFF, STATE_ADDR_PAGE, STATE_DATAIN, STATE_CMD_PAGEPROG | ACTION_PRGPAGE, STATE_READY}}, - /* Programm OOB */ + /* Program OOB */ {OPT_SMALLPAGE, {STATE_CMD_READOOB, STATE_CMD_SEQIN | ACTION_OOBOFF, STATE_ADDR_PAGE, STATE_DATAIN, STATE_CMD_PAGEPROG | ACTION_PRGPAGE, STATE_READY}}, /* Erase sector */ @@ -470,7 +470,7 @@ static int alloc_device(struct nandsim *ns) err = -EINVAL; goto err_close; } - ns->pages_written = vmalloc(ns->geom.pgnum); + ns->pages_written = vzalloc(ns->geom.pgnum); if (!ns->pages_written) { NS_ERR("alloc_device: unable to allocate pages written array\n"); err = -ENOMEM; @@ -483,7 +483,6 @@ static int alloc_device(struct nandsim *ns) goto err_free; } ns->cfile = cfile; - memset(ns->pages_written, 0, ns->geom.pgnum); return 0; } @@ -1171,9 +1170,9 @@ static inline void switch_to_ready_state(struct nandsim *ns, u_char status) * of supported operations. * * Operation can be unknown because of the following. - * 1. New command was accepted and this is the firs call to find the + * 1. New command was accepted and this is the first call to find the * correspondent states chain. In this case ns->npstates = 0; - * 2. There is several operations which begin with the same command(s) + * 2. There are several operations which begin with the same command(s) * (for example program from the second half and read from the * second half operations both begin with the READ1 command). In this * case the ns->pstates[] array contains previous states. @@ -1186,7 +1185,7 @@ static inline void switch_to_ready_state(struct nandsim *ns, u_char status) * ns->ops, ns->state, ns->nxstate are initialized, ns->npstate is * zeroed). * - * If there are several maches, the current state is pushed to the + * If there are several matches, the current state is pushed to the * ns->pstates. * * The operation can be unknown only while commands are input to the chip. @@ -1195,10 +1194,10 @@ static inline void switch_to_ready_state(struct nandsim *ns, u_char status) * operation is searched using the following pattern: * ns->pstates[0], ... ns->pstates[ns->npstates], <address input> * - * It is supposed that this pattern must either match one operation on + * It is supposed that this pattern must either match one operation or * none. There can't be ambiguity in that case. * - * If no matches found, the functions does the following: + * If no matches found, the function does the following: * 1. if there are saved states present, try to ignore them and search * again only using the last command. If nothing was found, switch * to the STATE_READY state. @@ -1668,7 +1667,7 @@ static int do_state_action(struct nandsim *ns, uint32_t action) case ACTION_PRGPAGE: /* - * Programm page - move internal buffer data to the page. + * Program page - move internal buffer data to the page. */ if (ns->lines.wp) { @@ -1933,7 +1932,7 @@ static u_char ns_nand_read_byte(struct mtd_info *mtd) NS_DBG("read_byte: all bytes were read\n"); /* - * The OPT_AUTOINCR allows to read next conseqitive pages without + * The OPT_AUTOINCR allows to read next consecutive pages without * new read operation cycle. */ if ((ns->options & OPT_AUTOINCR) && NS_STATE(ns->state) == STATE_DATAOUT) { diff --git a/drivers/mtd/nand/pasemi_nand.c b/drivers/mtd/nand/pasemi_nand.c index 6ddb2461d740..bb277a54986f 100644 --- a/drivers/mtd/nand/pasemi_nand.c +++ b/drivers/mtd/nand/pasemi_nand.c @@ -107,7 +107,7 @@ static int __devinit pasemi_nand_probe(struct platform_device *ofdev, if (pasemi_nand_mtd) return -ENODEV; - pr_debug("pasemi_nand at %llx-%llx\n", res.start, res.end); + pr_debug("pasemi_nand at %pR\n", &res); /* Allocate memory for MTD device structure and private data */ pasemi_nand_mtd = kzalloc(sizeof(struct mtd_info) + diff --git a/drivers/mtd/nand/pxa3xx_nand.c b/drivers/mtd/nand/pxa3xx_nand.c index 17f8518cc5eb..ea2c288df3f6 100644 --- a/drivers/mtd/nand/pxa3xx_nand.c +++ b/drivers/mtd/nand/pxa3xx_nand.c @@ -885,6 +885,7 @@ static int pxa3xx_nand_detect_config(struct pxa3xx_nand_info *info) /* set info fields needed to __readid */ info->read_id_bytes = (info->page_size == 2048) ? 4 : 2; info->reg_ndcr = ndcr; + info->cmdset = &default_cmdset; if (__readid(info, &id)) return -ENODEV; @@ -915,7 +916,6 @@ static int pxa3xx_nand_detect_config(struct pxa3xx_nand_info *info) info->ndtr0cs0 = nand_readl(info, NDTR0CS0); info->ndtr1cs0 = nand_readl(info, NDTR1CS0); - info->cmdset = &default_cmdset; return 0; } diff --git a/drivers/mtd/nand/txx9ndfmc.c b/drivers/mtd/nand/txx9ndfmc.c index 054a41c0ef4a..ca270a4881a4 100644 --- a/drivers/mtd/nand/txx9ndfmc.c +++ b/drivers/mtd/nand/txx9ndfmc.c @@ -277,8 +277,9 @@ static int txx9ndfmc_nand_scan(struct mtd_info *mtd) ret = nand_scan_ident(mtd, 1, NULL); if (!ret) { if (mtd->writesize >= 512) { - chip->ecc.size = mtd->writesize; - chip->ecc.bytes = 3 * (mtd->writesize / 256); + /* Hardware ECC 6 byte ECC per 512 Byte data */ + chip->ecc.size = 512; + chip->ecc.bytes = 6; } ret = nand_scan_tail(mtd); } diff --git a/drivers/mtd/onenand/omap2.c b/drivers/mtd/onenand/omap2.c index d0894ca7798b..ac31f461cc1c 100644 --- a/drivers/mtd/onenand/omap2.c +++ b/drivers/mtd/onenand/omap2.c @@ -35,6 +35,7 @@ #include <linux/dma-mapping.h> #include <linux/io.h> #include <linux/slab.h> +#include <linux/regulator/consumer.h> #include <asm/mach/flash.h> #include <plat/gpmc.h> @@ -63,8 +64,13 @@ struct omap2_onenand { int dma_channel; int freq; int (*setup)(void __iomem *base, int freq); + struct regulator *regulator; }; +#ifdef CONFIG_MTD_PARTITIONS +static const char *part_probes[] = { "cmdlinepart", NULL, }; +#endif + static void omap2_onenand_dma_cb(int lch, u16 ch_status, void *data) { struct omap2_onenand *c = data; @@ -108,8 +114,9 @@ static void wait_warn(char *msg, int state, unsigned int ctrl, static int omap2_onenand_wait(struct mtd_info *mtd, int state) { struct omap2_onenand *c = container_of(mtd, struct omap2_onenand, mtd); + struct onenand_chip *this = mtd->priv; unsigned int intr = 0; - unsigned int ctrl; + unsigned int ctrl, ctrl_mask; unsigned long timeout; u32 syscfg; @@ -180,7 +187,8 @@ retry: if (result == 0) { /* Timeout after 20ms */ ctrl = read_reg(c, ONENAND_REG_CTRL_STATUS); - if (ctrl & ONENAND_CTRL_ONGO) { + if (ctrl & ONENAND_CTRL_ONGO && + !this->ongoing) { /* * The operation seems to be still going * so give it some more time. @@ -269,7 +277,11 @@ retry: return -EIO; } - if (ctrl & 0xFE9F) + ctrl_mask = 0xFE9F; + if (this->ongoing) + ctrl_mask &= ~0x8000; + + if (ctrl & ctrl_mask) wait_warn("unexpected controller status", state, ctrl, intr); return 0; @@ -591,6 +603,30 @@ static void omap2_onenand_shutdown(struct platform_device *pdev) memset((__force void *)c->onenand.base, 0, ONENAND_BUFRAM_SIZE); } +static int omap2_onenand_enable(struct mtd_info *mtd) +{ + int ret; + struct omap2_onenand *c = container_of(mtd, struct omap2_onenand, mtd); + + ret = regulator_enable(c->regulator); + if (ret != 0) + dev_err(&c->pdev->dev, "cant enable regulator\n"); + + return ret; +} + +static int omap2_onenand_disable(struct mtd_info *mtd) +{ + int ret; + struct omap2_onenand *c = container_of(mtd, struct omap2_onenand, mtd); + + ret = regulator_disable(c->regulator); + if (ret != 0) + dev_err(&c->pdev->dev, "cant disable regulator\n"); + + return ret; +} + static int __devinit omap2_onenand_probe(struct platform_device *pdev) { struct omap_onenand_platform_data *pdata; @@ -705,8 +741,18 @@ static int __devinit omap2_onenand_probe(struct platform_device *pdev) } } + if (pdata->regulator_can_sleep) { + c->regulator = regulator_get(&pdev->dev, "vonenand"); + if (IS_ERR(c->regulator)) { + dev_err(&pdev->dev, "Failed to get regulator\n"); + goto err_release_dma; + } + c->onenand.enable = omap2_onenand_enable; + c->onenand.disable = omap2_onenand_disable; + } + if ((r = onenand_scan(&c->mtd, 1)) < 0) - goto err_release_dma; + goto err_release_regulator; switch ((c->onenand.version_id >> 4) & 0xf) { case 0: @@ -727,13 +773,15 @@ static int __devinit omap2_onenand_probe(struct platform_device *pdev) } #ifdef CONFIG_MTD_PARTITIONS - if (pdata->parts != NULL) - r = add_mtd_partitions(&c->mtd, pdata->parts, - pdata->nr_parts); + r = parse_mtd_partitions(&c->mtd, part_probes, &c->parts, 0); + if (r > 0) + r = add_mtd_partitions(&c->mtd, c->parts, r); + else if (pdata->parts != NULL) + r = add_mtd_partitions(&c->mtd, pdata->parts, pdata->nr_parts); else #endif r = add_mtd_device(&c->mtd); - if (r < 0) + if (r) goto err_release_onenand; platform_set_drvdata(pdev, c); @@ -742,6 +790,8 @@ static int __devinit omap2_onenand_probe(struct platform_device *pdev) err_release_onenand: onenand_release(&c->mtd); +err_release_regulator: + regulator_put(c->regulator); err_release_dma: if (c->dma_channel != -1) omap_free_dma(c->dma_channel); @@ -757,6 +807,7 @@ err_release_mem_region: err_free_cs: gpmc_cs_free(c->gpmc_cs); err_kfree: + kfree(c->parts); kfree(c); return r; @@ -766,18 +817,8 @@ static int __devexit omap2_onenand_remove(struct platform_device *pdev) { struct omap2_onenand *c = dev_get_drvdata(&pdev->dev); - BUG_ON(c == NULL); - -#ifdef CONFIG_MTD_PARTITIONS - if (c->parts) - del_mtd_partitions(&c->mtd); - else - del_mtd_device(&c->mtd); -#else - del_mtd_device(&c->mtd); -#endif - onenand_release(&c->mtd); + regulator_put(c->regulator); if (c->dma_channel != -1) omap_free_dma(c->dma_channel); omap2_onenand_shutdown(pdev); @@ -789,6 +830,7 @@ static int __devexit omap2_onenand_remove(struct platform_device *pdev) iounmap(c->onenand.base); release_mem_region(c->phys_base, ONENAND_IO_SIZE); gpmc_cs_free(c->gpmc_cs); + kfree(c->parts); kfree(c); return 0; diff --git a/drivers/mtd/onenand/onenand_base.c b/drivers/mtd/onenand/onenand_base.c index 6b3a875647c9..bac41caa8df7 100644 --- a/drivers/mtd/onenand/onenand_base.c +++ b/drivers/mtd/onenand/onenand_base.c @@ -400,8 +400,7 @@ static int onenand_command(struct mtd_info *mtd, int cmd, loff_t addr, size_t le value = onenand_bufferram_address(this, block); this->write_word(value, this->base + ONENAND_REG_START_ADDRESS2); - if (ONENAND_IS_MLC(this) || ONENAND_IS_2PLANE(this) || - ONENAND_IS_4KB_PAGE(this)) + if (ONENAND_IS_2PLANE(this) || ONENAND_IS_4KB_PAGE(this)) /* It is always BufferRAM0 */ ONENAND_SET_BUFFERRAM0(this); else @@ -430,7 +429,7 @@ static int onenand_command(struct mtd_info *mtd, int cmd, loff_t addr, size_t le case FLEXONENAND_CMD_RECOVER_LSB: case ONENAND_CMD_READ: case ONENAND_CMD_READOOB: - if (ONENAND_IS_MLC(this) || ONENAND_IS_4KB_PAGE(this)) + if (ONENAND_IS_4KB_PAGE(this)) /* It is always BufferRAM0 */ dataram = ONENAND_SET_BUFFERRAM0(this); else @@ -949,6 +948,8 @@ static int onenand_get_device(struct mtd_info *mtd, int new_state) if (this->state == FL_READY) { this->state = new_state; spin_unlock(&this->chip_lock); + if (new_state != FL_PM_SUSPENDED && this->enable) + this->enable(mtd); break; } if (new_state == FL_PM_SUSPENDED) { @@ -975,6 +976,8 @@ static void onenand_release_device(struct mtd_info *mtd) { struct onenand_chip *this = mtd->priv; + if (this->state != FL_PM_SUSPENDED && this->disable) + this->disable(mtd); /* Release the chip */ spin_lock(&this->chip_lock); this->state = FL_READY; @@ -1353,7 +1356,7 @@ static int onenand_read_oob_nolock(struct mtd_info *mtd, loff_t from, stats = mtd->ecc_stats; - readcmd = ONENAND_IS_MLC(this) ? ONENAND_CMD_READ : ONENAND_CMD_READOOB; + readcmd = ONENAND_IS_4KB_PAGE(this) ? ONENAND_CMD_READ : ONENAND_CMD_READOOB; while (read < len) { cond_resched(); @@ -1429,7 +1432,7 @@ static int onenand_read(struct mtd_info *mtd, loff_t from, size_t len, int ret; onenand_get_device(mtd, FL_READING); - ret = ONENAND_IS_MLC(this) || ONENAND_IS_4KB_PAGE(this) ? + ret = ONENAND_IS_4KB_PAGE(this) ? onenand_mlc_read_ops_nolock(mtd, from, &ops) : onenand_read_ops_nolock(mtd, from, &ops); onenand_release_device(mtd); @@ -1464,7 +1467,7 @@ static int onenand_read_oob(struct mtd_info *mtd, loff_t from, onenand_get_device(mtd, FL_READING); if (ops->datbuf) - ret = ONENAND_IS_MLC(this) || ONENAND_IS_4KB_PAGE(this) ? + ret = ONENAND_IS_4KB_PAGE(this) ? onenand_mlc_read_ops_nolock(mtd, from, ops) : onenand_read_ops_nolock(mtd, from, ops); else @@ -1485,8 +1488,7 @@ static int onenand_bbt_wait(struct mtd_info *mtd, int state) { struct onenand_chip *this = mtd->priv; unsigned long timeout; - unsigned int interrupt; - unsigned int ctrl; + unsigned int interrupt, ctrl, ecc, addr1, addr8; /* The 20 msec is enough */ timeout = jiffies + msecs_to_jiffies(20); @@ -1498,25 +1500,28 @@ static int onenand_bbt_wait(struct mtd_info *mtd, int state) /* To get correct interrupt status in timeout case */ interrupt = this->read_word(this->base + ONENAND_REG_INTERRUPT); ctrl = this->read_word(this->base + ONENAND_REG_CTRL_STATUS); + addr1 = this->read_word(this->base + ONENAND_REG_START_ADDRESS1); + addr8 = this->read_word(this->base + ONENAND_REG_START_ADDRESS8); if (interrupt & ONENAND_INT_READ) { - int ecc = onenand_read_ecc(this); + ecc = onenand_read_ecc(this); if (ecc & ONENAND_ECC_2BIT_ALL) { - printk(KERN_WARNING "%s: ecc error = 0x%04x, " - "controller error 0x%04x\n", - __func__, ecc, ctrl); + printk(KERN_DEBUG "%s: ecc 0x%04x ctrl 0x%04x " + "intr 0x%04x addr1 %#x addr8 %#x\n", + __func__, ecc, ctrl, interrupt, addr1, addr8); return ONENAND_BBT_READ_ECC_ERROR; } } else { - printk(KERN_ERR "%s: read timeout! ctrl=0x%04x intr=0x%04x\n", - __func__, ctrl, interrupt); + printk(KERN_ERR "%s: read timeout! ctrl 0x%04x " + "intr 0x%04x addr1 %#x addr8 %#x\n", + __func__, ctrl, interrupt, addr1, addr8); return ONENAND_BBT_READ_FATAL_ERROR; } /* Initial bad block case: 0x2400 or 0x0400 */ if (ctrl & ONENAND_CTRL_ERROR) { - printk(KERN_DEBUG "%s: controller error = 0x%04x\n", - __func__, ctrl); + printk(KERN_DEBUG "%s: ctrl 0x%04x intr 0x%04x addr1 %#x " + "addr8 %#x\n", __func__, ctrl, interrupt, addr1, addr8); return ONENAND_BBT_READ_ERROR; } @@ -1558,7 +1563,7 @@ int onenand_bbt_read_oob(struct mtd_info *mtd, loff_t from, column = from & (mtd->oobsize - 1); - readcmd = ONENAND_IS_MLC(this) ? ONENAND_CMD_READ : ONENAND_CMD_READOOB; + readcmd = ONENAND_IS_4KB_PAGE(this) ? ONENAND_CMD_READ : ONENAND_CMD_READOOB; while (read < len) { cond_resched(); @@ -1612,7 +1617,7 @@ static int onenand_verify_oob(struct mtd_info *mtd, const u_char *buf, loff_t to u_char *oob_buf = this->oob_buf; int status, i, readcmd; - readcmd = ONENAND_IS_MLC(this) ? ONENAND_CMD_READ : ONENAND_CMD_READOOB; + readcmd = ONENAND_IS_4KB_PAGE(this) ? ONENAND_CMD_READ : ONENAND_CMD_READOOB; this->command(mtd, readcmd, to, mtd->oobsize); onenand_update_bufferram(mtd, to, 0); @@ -1845,7 +1850,7 @@ static int onenand_write_ops_nolock(struct mtd_info *mtd, loff_t to, const u_char *buf = ops->datbuf; const u_char *oob = ops->oobbuf; u_char *oobbuf; - int ret = 0; + int ret = 0, cmd; DEBUG(MTD_DEBUG_LEVEL3, "%s: to = 0x%08x, len = %i\n", __func__, (unsigned int) to, (int) len); @@ -1954,7 +1959,19 @@ static int onenand_write_ops_nolock(struct mtd_info *mtd, loff_t to, ONENAND_SET_NEXT_BUFFERRAM(this); } - this->command(mtd, ONENAND_CMD_PROG, to, mtd->writesize); + this->ongoing = 0; + cmd = ONENAND_CMD_PROG; + + /* Exclude 1st OTP and OTP blocks for cache program feature */ + if (ONENAND_IS_CACHE_PROGRAM(this) && + likely(onenand_block(this, to) != 0) && + ONENAND_IS_4KB_PAGE(this) && + ((written + thislen) < len)) { + cmd = ONENAND_CMD_2X_CACHE_PROG; + this->ongoing = 1; + } + + this->command(mtd, cmd, to, mtd->writesize); /* * 2 PLANE, MLC, and Flex-OneNAND wait here @@ -2067,7 +2084,7 @@ static int onenand_write_oob_nolock(struct mtd_info *mtd, loff_t to, oobbuf = this->oob_buf; - oobcmd = ONENAND_IS_MLC(this) ? ONENAND_CMD_PROG : ONENAND_CMD_PROGOOB; + oobcmd = ONENAND_IS_4KB_PAGE(this) ? ONENAND_CMD_PROG : ONENAND_CMD_PROGOOB; /* Loop until all data write */ while (written < len) { @@ -2086,7 +2103,7 @@ static int onenand_write_oob_nolock(struct mtd_info *mtd, loff_t to, memcpy(oobbuf + column, buf, thislen); this->write_bufferram(mtd, ONENAND_SPARERAM, oobbuf, 0, mtd->oobsize); - if (ONENAND_IS_MLC(this) || ONENAND_IS_4KB_PAGE(this)) { + if (ONENAND_IS_4KB_PAGE(this)) { /* Set main area of DataRAM to 0xff*/ memset(this->page_buf, 0xff, mtd->writesize); this->write_bufferram(mtd, ONENAND_DATARAM, @@ -2481,7 +2498,8 @@ static int onenand_erase(struct mtd_info *mtd, struct erase_info *instr) /* Grab the lock and see if the device is available */ onenand_get_device(mtd, FL_ERASING); - if (region || instr->len < MB_ERASE_MIN_BLK_COUNT * block_size) { + if (ONENAND_IS_4KB_PAGE(this) || region || + instr->len < MB_ERASE_MIN_BLK_COUNT * block_size) { /* region is set for Flex-OneNAND (no mb erase) */ ret = onenand_block_by_block_erase(mtd, instr, region, block_size); @@ -3029,7 +3047,7 @@ static int do_otp_read(struct mtd_info *mtd, loff_t from, size_t len, this->command(mtd, ONENAND_CMD_OTP_ACCESS, 0, 0); this->wait(mtd, FL_OTPING); - ret = ONENAND_IS_MLC(this) || ONENAND_IS_4KB_PAGE(this) ? + ret = ONENAND_IS_4KB_PAGE(this) ? onenand_mlc_read_ops_nolock(mtd, from, &ops) : onenand_read_ops_nolock(mtd, from, &ops); @@ -3377,8 +3395,10 @@ static void onenand_check_features(struct mtd_info *mtd) case ONENAND_DEVICE_DENSITY_4Gb: if (ONENAND_IS_DDP(this)) this->options |= ONENAND_HAS_2PLANE; - else if (numbufs == 1) + else if (numbufs == 1) { this->options |= ONENAND_HAS_4KB_PAGE; + this->options |= ONENAND_HAS_CACHE_PROGRAM; + } case ONENAND_DEVICE_DENSITY_2Gb: /* 2Gb DDP does not have 2 plane */ @@ -3399,7 +3419,11 @@ static void onenand_check_features(struct mtd_info *mtd) break; } - if (ONENAND_IS_MLC(this) || ONENAND_IS_4KB_PAGE(this)) + /* The MLC has 4KiB pagesize. */ + if (ONENAND_IS_MLC(this)) + this->options |= ONENAND_HAS_4KB_PAGE; + + if (ONENAND_IS_4KB_PAGE(this)) this->options &= ~ONENAND_HAS_2PLANE; if (FLEXONENAND(this)) { @@ -3415,6 +3439,8 @@ static void onenand_check_features(struct mtd_info *mtd) printk(KERN_DEBUG "Chip has 2 plane\n"); if (this->options & ONENAND_HAS_4KB_PAGE) printk(KERN_DEBUG "Chip has 4KiB pagesize\n"); + if (this->options & ONENAND_HAS_CACHE_PROGRAM) + printk(KERN_DEBUG "Chip has cache program feature\n"); } /** @@ -3831,7 +3857,7 @@ static int onenand_probe(struct mtd_info *mtd) /* The data buffer size is equal to page size */ mtd->writesize = this->read_word(this->base + ONENAND_REG_DATA_BUFFER_SIZE); /* We use the full BufferRAM */ - if (ONENAND_IS_MLC(this) || ONENAND_IS_4KB_PAGE(this)) + if (ONENAND_IS_4KB_PAGE(this)) mtd->writesize <<= 1; mtd->oobsize = mtd->writesize >> 5; @@ -4054,6 +4080,7 @@ int onenand_scan(struct mtd_info *mtd, int maxchips) mtd->block_isbad = onenand_block_isbad; mtd->block_markbad = onenand_block_markbad; mtd->owner = THIS_MODULE; + mtd->writebufsize = mtd->writesize; /* Unlock whole block */ this->unlock_all(mtd); diff --git a/drivers/mtd/onenand/onenand_bbt.c b/drivers/mtd/onenand/onenand_bbt.c index 01ab5b3c453b..fc2c16a0fd1c 100644 --- a/drivers/mtd/onenand/onenand_bbt.c +++ b/drivers/mtd/onenand/onenand_bbt.c @@ -91,16 +91,18 @@ static int create_bbt(struct mtd_info *mtd, uint8_t *buf, struct nand_bbt_descr for (j = 0; j < len; j++) { /* No need to read pages fully, * just read required OOB bytes */ - ret = onenand_bbt_read_oob(mtd, from + j * mtd->writesize + bd->offs, &ops); + ret = onenand_bbt_read_oob(mtd, + from + j * this->writesize + bd->offs, &ops); /* If it is a initial bad block, just ignore it */ if (ret == ONENAND_BBT_READ_FATAL_ERROR) return -EIO; - if (ret || check_short_pattern(&buf[j * scanlen], scanlen, mtd->writesize, bd)) { + if (ret || check_short_pattern(&buf[j * scanlen], + scanlen, this->writesize, bd)) { bbm->bbt[i >> 3] |= 0x03 << (i & 0x6); - printk(KERN_WARNING "Bad eraseblock %d at 0x%08x\n", - i >> 1, (unsigned int) from); + printk(KERN_INFO "OneNAND eraseblock %d is an " + "initial bad block\n", i >> 1); mtd->ecc_stats.badblocks++; break; } diff --git a/drivers/mtd/onenand/samsung.c b/drivers/mtd/onenand/samsung.c index 0de7a05e6de0..a4c74a9ba430 100644 --- a/drivers/mtd/onenand/samsung.c +++ b/drivers/mtd/onenand/samsung.c @@ -651,7 +651,7 @@ static int s5pc110_read_bufferram(struct mtd_info *mtd, int area, void __iomem *p; void *buf = (void *) buffer; dma_addr_t dma_src, dma_dst; - int err, page_dma = 0; + int err, ofs, page_dma = 0; struct device *dev = &onenand->pdev->dev; p = this->base + area; @@ -677,10 +677,13 @@ static int s5pc110_read_bufferram(struct mtd_info *mtd, int area, if (!page) goto normal; + /* Page offset */ + ofs = ((size_t) buf & ~PAGE_MASK); page_dma = 1; + /* DMA routine */ dma_src = onenand->phys_base + (p - this->base); - dma_dst = dma_map_page(dev, page, 0, count, DMA_FROM_DEVICE); + dma_dst = dma_map_page(dev, page, ofs, count, DMA_FROM_DEVICE); } else { /* DMA routine */ dma_src = onenand->phys_base + (p - this->base); diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c index 5ebe280225d6..f49e49dc5928 100644 --- a/drivers/mtd/ubi/build.c +++ b/drivers/mtd/ubi/build.c @@ -672,7 +672,33 @@ static int io_init(struct ubi_device *ubi) ubi->nor_flash = 1; } - ubi->min_io_size = ubi->mtd->writesize; + /* + * Set UBI min. I/O size (@ubi->min_io_size). We use @mtd->writebufsize + * for these purposes, not @mtd->writesize. At the moment this does not + * matter for NAND, because currently @mtd->writebufsize is equivalent to + * @mtd->writesize for all NANDs. However, some CFI NOR flashes may + * have @mtd->writebufsize which is multiple of @mtd->writesize. + * + * The reason we use @mtd->writebufsize for @ubi->min_io_size is that + * UBI and UBIFS recovery algorithms rely on the fact that if there was + * an unclean power cut, then we can find offset of the last corrupted + * node, align the offset to @ubi->min_io_size, read the rest of the + * eraseblock starting from this offset, and check whether there are + * only 0xFF bytes. If yes, then we are probably dealing with a + * corruption caused by a power cut, if not, then this is probably some + * severe corruption. + * + * Thus, we have to use the maximum write unit size of the flash, which + * is @mtd->writebufsize, because @mtd->writesize is the minimum write + * size, not the maximum. + */ + if (ubi->mtd->type == MTD_NANDFLASH) + ubi_assert(ubi->mtd->writebufsize == ubi->mtd->writesize); + else if (ubi->mtd->type == MTD_NORFLASH) + ubi_assert(ubi->mtd->writebufsize % ubi->mtd->writesize == 0); + + ubi->min_io_size = ubi->mtd->writebufsize; + ubi->hdrs_min_io_size = ubi->mtd->writesize >> ubi->mtd->subpage_sft; /* diff --git a/drivers/mtd/ubi/vtbl.c b/drivers/mtd/ubi/vtbl.c index fcdb7f65fe0b..0b8141fc5c26 100644 --- a/drivers/mtd/ubi/vtbl.c +++ b/drivers/mtd/ubi/vtbl.c @@ -425,12 +425,11 @@ static struct ubi_vtbl_record *process_lvol(struct ubi_device *ubi, /* Read both LEB 0 and LEB 1 into memory */ ubi_rb_for_each_entry(rb, seb, &sv->root, u.rb) { - leb[seb->lnum] = vmalloc(ubi->vtbl_size); + leb[seb->lnum] = vzalloc(ubi->vtbl_size); if (!leb[seb->lnum]) { err = -ENOMEM; goto out_free; } - memset(leb[seb->lnum], 0, ubi->vtbl_size); err = ubi_io_read_data(ubi, leb[seb->lnum], seb->pnum, 0, ubi->vtbl_size); @@ -516,10 +515,9 @@ static struct ubi_vtbl_record *create_empty_lvol(struct ubi_device *ubi, int i; struct ubi_vtbl_record *vtbl; - vtbl = vmalloc(ubi->vtbl_size); + vtbl = vzalloc(ubi->vtbl_size); if (!vtbl) return ERR_PTR(-ENOMEM); - memset(vtbl, 0, ubi->vtbl_size); for (i = 0; i < ubi->vtbl_slots; i++) memcpy(&vtbl[i], &empty_vtbl_record, UBI_VTBL_RECORD_SIZE); diff --git a/drivers/net/mlx4/catas.c b/drivers/net/mlx4/catas.c index 68aaa42d0ced..32f947154c33 100644 --- a/drivers/net/mlx4/catas.c +++ b/drivers/net/mlx4/catas.c @@ -113,7 +113,7 @@ static void catas_reset(struct work_struct *work) void mlx4_start_catas_poll(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); - unsigned long addr; + phys_addr_t addr; INIT_LIST_HEAD(&priv->catas_err.list); init_timer(&priv->catas_err.timer); @@ -124,8 +124,8 @@ void mlx4_start_catas_poll(struct mlx4_dev *dev) priv->catas_err.map = ioremap(addr, priv->fw.catas_size * 4); if (!priv->catas_err.map) { - mlx4_warn(dev, "Failed to map internal error buffer at 0x%lx\n", - addr); + mlx4_warn(dev, "Failed to map internal error buffer at 0x%llx\n", + (unsigned long long) addr); return; } diff --git a/drivers/net/mlx4/en_main.c b/drivers/net/mlx4/en_main.c index f6e0d40cd876..1ff6ca6466ed 100644 --- a/drivers/net/mlx4/en_main.c +++ b/drivers/net/mlx4/en_main.c @@ -202,7 +202,8 @@ static void *mlx4_en_add(struct mlx4_dev *dev) if (mlx4_uar_alloc(dev, &mdev->priv_uar)) goto err_pd; - mdev->uar_map = ioremap(mdev->priv_uar.pfn << PAGE_SHIFT, PAGE_SIZE); + mdev->uar_map = ioremap((phys_addr_t) mdev->priv_uar.pfn << PAGE_SHIFT, + PAGE_SIZE); if (!mdev->uar_map) goto err_uar; spin_lock_init(&mdev->uar_lock); diff --git a/drivers/net/mlx4/main.c b/drivers/net/mlx4/main.c index 782f11d8fa71..4ffdc18fcb8a 100644 --- a/drivers/net/mlx4/main.c +++ b/drivers/net/mlx4/main.c @@ -829,7 +829,7 @@ static int mlx4_setup_hca(struct mlx4_dev *dev) goto err_uar_table_free; } - priv->kar = ioremap(priv->driver_uar.pfn << PAGE_SHIFT, PAGE_SIZE); + priv->kar = ioremap((phys_addr_t) priv->driver_uar.pfn << PAGE_SHIFT, PAGE_SIZE); if (!priv->kar) { mlx4_err(dev, "Couldn't map kernel access region, " "aborting.\n"); diff --git a/drivers/net/mlx4/mcg.c b/drivers/net/mlx4/mcg.c index c4f88b7ef7b6..79cf42db2ea9 100644 --- a/drivers/net/mlx4/mcg.c +++ b/drivers/net/mlx4/mcg.c @@ -95,7 +95,8 @@ static int mlx4_MGID_HASH(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox * entry in hash chain and *mgm holds end of hash chain. */ static int find_mgm(struct mlx4_dev *dev, - u8 *gid, struct mlx4_cmd_mailbox *mgm_mailbox, + u8 *gid, enum mlx4_protocol protocol, + struct mlx4_cmd_mailbox *mgm_mailbox, u16 *hash, int *prev, int *index) { struct mlx4_cmd_mailbox *mailbox; @@ -134,7 +135,8 @@ static int find_mgm(struct mlx4_dev *dev, return err; } - if (!memcmp(mgm->gid, gid, 16)) + if (!memcmp(mgm->gid, gid, 16) && + be32_to_cpu(mgm->members_count) >> 30 == protocol) return err; *prev = *index; @@ -146,7 +148,7 @@ static int find_mgm(struct mlx4_dev *dev, } int mlx4_multicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], - int block_mcast_loopback) + int block_mcast_loopback, enum mlx4_protocol protocol) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_cmd_mailbox *mailbox; @@ -165,7 +167,7 @@ int mlx4_multicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], mutex_lock(&priv->mcg_table.mutex); - err = find_mgm(dev, gid, mailbox, &hash, &prev, &index); + err = find_mgm(dev, gid, protocol, mailbox, &hash, &prev, &index); if (err) goto out; @@ -187,7 +189,7 @@ int mlx4_multicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], memcpy(mgm->gid, gid, 16); } - members_count = be32_to_cpu(mgm->members_count); + members_count = be32_to_cpu(mgm->members_count) & 0xffffff; if (members_count == MLX4_QP_PER_MGM) { mlx4_err(dev, "MGM at index %x is full.\n", index); err = -ENOMEM; @@ -207,7 +209,7 @@ int mlx4_multicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], else mgm->qp[members_count++] = cpu_to_be32(qp->qpn & MGM_QPN_MASK); - mgm->members_count = cpu_to_be32(members_count); + mgm->members_count = cpu_to_be32(members_count | (u32) protocol << 30); err = mlx4_WRITE_MCG(dev, index, mailbox); if (err) @@ -242,7 +244,8 @@ out: } EXPORT_SYMBOL_GPL(mlx4_multicast_attach); -int mlx4_multicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16]) +int mlx4_multicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], + enum mlx4_protocol protocol) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_cmd_mailbox *mailbox; @@ -260,7 +263,7 @@ int mlx4_multicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16]) mutex_lock(&priv->mcg_table.mutex); - err = find_mgm(dev, gid, mailbox, &hash, &prev, &index); + err = find_mgm(dev, gid, protocol, mailbox, &hash, &prev, &index); if (err) goto out; @@ -270,7 +273,7 @@ int mlx4_multicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16]) goto out; } - members_count = be32_to_cpu(mgm->members_count); + members_count = be32_to_cpu(mgm->members_count) & 0xffffff; for (loc = -1, i = 0; i < members_count; ++i) if ((be32_to_cpu(mgm->qp[i]) & MGM_QPN_MASK) == qp->qpn) loc = i; @@ -282,7 +285,7 @@ int mlx4_multicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16]) } - mgm->members_count = cpu_to_be32(--members_count); + mgm->members_count = cpu_to_be32(--members_count | (u32) protocol << 30); mgm->qp[loc] = mgm->qp[i - 1]; mgm->qp[i - 1] = 0; diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index c787c3d95c60..af824e7e0367 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -692,12 +692,6 @@ int __init early_init_dt_scan_chosen(unsigned long node, const char *uname, return 1; } -static void *__init early_device_tree_alloc(u64 size, u64 align) -{ - unsigned long mem = early_init_dt_alloc_memory_arch(size, align); - return __va(mem); -} - /** * unflatten_device_tree - create tree of device_nodes from flat blob * @@ -709,7 +703,7 @@ static void *__init early_device_tree_alloc(u64 size, u64 align) void __init unflatten_device_tree(void) { __unflatten_device_tree(initial_boot_params, &allnodes, - early_device_tree_alloc); + early_init_dt_alloc_memory_arch); /* Get pointer to OF "/chosen" node for use everywhere */ of_chosen = of_find_node_by_path("/chosen"); diff --git a/drivers/spi/spi_sh_msiof.c b/drivers/spi/spi_sh_msiof.c index d93b66743ba7..56f60c8ea0ab 100644 --- a/drivers/spi/spi_sh_msiof.c +++ b/drivers/spi/spi_sh_msiof.c @@ -635,7 +635,7 @@ static int sh_msiof_spi_remove(struct platform_device *pdev) ret = spi_bitbang_stop(&p->bitbang); if (!ret) { pm_runtime_disable(&pdev->dev); - free_irq(platform_get_irq(pdev, 0), sh_msiof_spi_irq); + free_irq(platform_get_irq(pdev, 0), p); iounmap(p->mapbase); clk_put(p->clk); spi_master_put(p->bitbang.master); diff --git a/drivers/staging/smbfs/dir.c b/drivers/staging/smbfs/dir.c index 87a3a9bd5842..f204d33910ec 100644 --- a/drivers/staging/smbfs/dir.c +++ b/drivers/staging/smbfs/dir.c @@ -283,7 +283,7 @@ static int smb_compare_dentry(const struct dentry *, unsigned int, const char *, const struct qstr *); static int smb_delete_dentry(const struct dentry *); -static const struct dentry_operations smbfs_dentry_operations = +const struct dentry_operations smbfs_dentry_operations = { .d_revalidate = smb_lookup_validate, .d_hash = smb_hash_dentry, @@ -291,7 +291,7 @@ static const struct dentry_operations smbfs_dentry_operations = .d_delete = smb_delete_dentry, }; -static const struct dentry_operations smbfs_dentry_operations_case = +const struct dentry_operations smbfs_dentry_operations_case = { .d_revalidate = smb_lookup_validate, .d_delete = smb_delete_dentry, diff --git a/fs/Kconfig b/fs/Kconfig index 771f457402d4..9a7921ae4763 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -30,15 +30,6 @@ config FS_MBCACHE source "fs/reiserfs/Kconfig" source "fs/jfs/Kconfig" -config FS_POSIX_ACL -# Posix ACL utility routines (for now, only ext2/ext3/jfs/reiserfs/nfs4) -# -# NOTE: you can implement Posix ACLs without these helpers (XFS does). -# Never use this symbol for ifdefs. -# - bool - default n - source "fs/xfs/Kconfig" source "fs/gfs2/Kconfig" source "fs/ocfs2/Kconfig" @@ -47,6 +38,14 @@ source "fs/nilfs2/Kconfig" endif # BLOCK +# Posix ACL utility routines +# +# Note: Posix ACLs can be implemented without these helpers. Never use +# this symbol for ifdefs in core code. +# +config FS_POSIX_ACL + def_bool n + config EXPORTFS tristate @@ -87,7 +87,7 @@ static int __init aio_setup(void) aio_wq = create_workqueue("aio"); abe_pool = mempool_create_kmalloc_pool(1, sizeof(struct aio_batch_entry)); - BUG_ON(!abe_pool); + BUG_ON(!aio_wq || !abe_pool); pr_debug("aio_setup: sizeof(struct page) = %d\n", (int)sizeof(struct page)); diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig index 7bb3c020e570..ecb9fd3be143 100644 --- a/fs/btrfs/Kconfig +++ b/fs/btrfs/Kconfig @@ -4,6 +4,8 @@ config BTRFS_FS select LIBCRC32C select ZLIB_INFLATE select ZLIB_DEFLATE + select LZO_COMPRESS + select LZO_DECOMPRESS help Btrfs is a new filesystem with extents, writable snapshotting, support for multiple devices and many more features. diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index a35eb36b32fd..31610ea73aec 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -6,5 +6,5 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ transaction.o inode.o file.o tree-defrag.o \ extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ - export.o tree-log.o acl.o free-space-cache.o zlib.o \ + export.o tree-log.o acl.o free-space-cache.o zlib.o lzo.o \ compression.o delayed-ref.o relocation.o diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index 6ae2c8cac9d5..15b5ca2a2606 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c @@ -60,8 +60,10 @@ static struct posix_acl *btrfs_get_acl(struct inode *inode, int type) size = __btrfs_getxattr(inode, name, value, size); if (size > 0) { acl = posix_acl_from_xattr(value, size); - if (IS_ERR(acl)) + if (IS_ERR(acl)) { + kfree(value); return acl; + } set_cached_acl(inode, type, acl); } kfree(value); diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 6ad63f17eca0..ccc991c542df 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -157,7 +157,7 @@ struct btrfs_inode { /* * always compress this one file */ - unsigned force_compress:1; + unsigned force_compress:4; struct inode vfs_inode; }; diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index b50bc4bd5c56..f745287fbf2e 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -62,6 +62,9 @@ struct compressed_bio { /* number of bytes on disk */ unsigned long compressed_len; + /* the compression algorithm for this bio */ + int compress_type; + /* number of compressed pages in the array */ unsigned long nr_pages; @@ -173,11 +176,12 @@ static void end_compressed_bio_read(struct bio *bio, int err) /* ok, we're the last bio for this extent, lets start * the decompression. */ - ret = btrfs_zlib_decompress_biovec(cb->compressed_pages, - cb->start, - cb->orig_bio->bi_io_vec, - cb->orig_bio->bi_vcnt, - cb->compressed_len); + ret = btrfs_decompress_biovec(cb->compress_type, + cb->compressed_pages, + cb->start, + cb->orig_bio->bi_io_vec, + cb->orig_bio->bi_vcnt, + cb->compressed_len); csum_failed: if (ret) cb->errors = 1; @@ -588,6 +592,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, cb->len = uncompressed_len; cb->compressed_len = compressed_len; + cb->compress_type = extent_compress_type(bio_flags); cb->orig_bio = bio; nr_pages = (compressed_len + PAGE_CACHE_SIZE - 1) / @@ -677,3 +682,317 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, bio_put(comp_bio); return 0; } + +static struct list_head comp_idle_workspace[BTRFS_COMPRESS_TYPES]; +static spinlock_t comp_workspace_lock[BTRFS_COMPRESS_TYPES]; +static int comp_num_workspace[BTRFS_COMPRESS_TYPES]; +static atomic_t comp_alloc_workspace[BTRFS_COMPRESS_TYPES]; +static wait_queue_head_t comp_workspace_wait[BTRFS_COMPRESS_TYPES]; + +struct btrfs_compress_op *btrfs_compress_op[] = { + &btrfs_zlib_compress, + &btrfs_lzo_compress, +}; + +int __init btrfs_init_compress(void) +{ + int i; + + for (i = 0; i < BTRFS_COMPRESS_TYPES; i++) { + INIT_LIST_HEAD(&comp_idle_workspace[i]); + spin_lock_init(&comp_workspace_lock[i]); + atomic_set(&comp_alloc_workspace[i], 0); + init_waitqueue_head(&comp_workspace_wait[i]); + } + return 0; +} + +/* + * this finds an available workspace or allocates a new one + * ERR_PTR is returned if things go bad. + */ +static struct list_head *find_workspace(int type) +{ + struct list_head *workspace; + int cpus = num_online_cpus(); + int idx = type - 1; + + struct list_head *idle_workspace = &comp_idle_workspace[idx]; + spinlock_t *workspace_lock = &comp_workspace_lock[idx]; + atomic_t *alloc_workspace = &comp_alloc_workspace[idx]; + wait_queue_head_t *workspace_wait = &comp_workspace_wait[idx]; + int *num_workspace = &comp_num_workspace[idx]; +again: + spin_lock(workspace_lock); + if (!list_empty(idle_workspace)) { + workspace = idle_workspace->next; + list_del(workspace); + (*num_workspace)--; + spin_unlock(workspace_lock); + return workspace; + + } + if (atomic_read(alloc_workspace) > cpus) { + DEFINE_WAIT(wait); + + spin_unlock(workspace_lock); + prepare_to_wait(workspace_wait, &wait, TASK_UNINTERRUPTIBLE); + if (atomic_read(alloc_workspace) > cpus && !*num_workspace) + schedule(); + finish_wait(workspace_wait, &wait); + goto again; + } + atomic_inc(alloc_workspace); + spin_unlock(workspace_lock); + + workspace = btrfs_compress_op[idx]->alloc_workspace(); + if (IS_ERR(workspace)) { + atomic_dec(alloc_workspace); + wake_up(workspace_wait); + } + return workspace; +} + +/* + * put a workspace struct back on the list or free it if we have enough + * idle ones sitting around + */ +static void free_workspace(int type, struct list_head *workspace) +{ + int idx = type - 1; + struct list_head *idle_workspace = &comp_idle_workspace[idx]; + spinlock_t *workspace_lock = &comp_workspace_lock[idx]; + atomic_t *alloc_workspace = &comp_alloc_workspace[idx]; + wait_queue_head_t *workspace_wait = &comp_workspace_wait[idx]; + int *num_workspace = &comp_num_workspace[idx]; + + spin_lock(workspace_lock); + if (*num_workspace < num_online_cpus()) { + list_add_tail(workspace, idle_workspace); + (*num_workspace)++; + spin_unlock(workspace_lock); + goto wake; + } + spin_unlock(workspace_lock); + + btrfs_compress_op[idx]->free_workspace(workspace); + atomic_dec(alloc_workspace); +wake: + if (waitqueue_active(workspace_wait)) + wake_up(workspace_wait); +} + +/* + * cleanup function for module exit + */ +static void free_workspaces(void) +{ + struct list_head *workspace; + int i; + + for (i = 0; i < BTRFS_COMPRESS_TYPES; i++) { + while (!list_empty(&comp_idle_workspace[i])) { + workspace = comp_idle_workspace[i].next; + list_del(workspace); + btrfs_compress_op[i]->free_workspace(workspace); + atomic_dec(&comp_alloc_workspace[i]); + } + } +} + +/* + * given an address space and start/len, compress the bytes. + * + * pages are allocated to hold the compressed result and stored + * in 'pages' + * + * out_pages is used to return the number of pages allocated. There + * may be pages allocated even if we return an error + * + * total_in is used to return the number of bytes actually read. It + * may be smaller then len if we had to exit early because we + * ran out of room in the pages array or because we cross the + * max_out threshold. + * + * total_out is used to return the total number of compressed bytes + * + * max_out tells us the max number of bytes that we're allowed to + * stuff into pages + */ +int btrfs_compress_pages(int type, struct address_space *mapping, + u64 start, unsigned long len, + struct page **pages, + unsigned long nr_dest_pages, + unsigned long *out_pages, + unsigned long *total_in, + unsigned long *total_out, + unsigned long max_out) +{ + struct list_head *workspace; + int ret; + + workspace = find_workspace(type); + if (IS_ERR(workspace)) + return -1; + + ret = btrfs_compress_op[type-1]->compress_pages(workspace, mapping, + start, len, pages, + nr_dest_pages, out_pages, + total_in, total_out, + max_out); + free_workspace(type, workspace); + return ret; +} + +/* + * pages_in is an array of pages with compressed data. + * + * disk_start is the starting logical offset of this array in the file + * + * bvec is a bio_vec of pages from the file that we want to decompress into + * + * vcnt is the count of pages in the biovec + * + * srclen is the number of bytes in pages_in + * + * The basic idea is that we have a bio that was created by readpages. + * The pages in the bio are for the uncompressed data, and they may not + * be contiguous. They all correspond to the range of bytes covered by + * the compressed extent. + */ +int btrfs_decompress_biovec(int type, struct page **pages_in, u64 disk_start, + struct bio_vec *bvec, int vcnt, size_t srclen) +{ + struct list_head *workspace; + int ret; + + workspace = find_workspace(type); + if (IS_ERR(workspace)) + return -ENOMEM; + + ret = btrfs_compress_op[type-1]->decompress_biovec(workspace, pages_in, + disk_start, + bvec, vcnt, srclen); + free_workspace(type, workspace); + return ret; +} + +/* + * a less complex decompression routine. Our compressed data fits in a + * single page, and we want to read a single page out of it. + * start_byte tells us the offset into the compressed data we're interested in + */ +int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page, + unsigned long start_byte, size_t srclen, size_t destlen) +{ + struct list_head *workspace; + int ret; + + workspace = find_workspace(type); + if (IS_ERR(workspace)) + return -ENOMEM; + + ret = btrfs_compress_op[type-1]->decompress(workspace, data_in, + dest_page, start_byte, + srclen, destlen); + + free_workspace(type, workspace); + return ret; +} + +void __exit btrfs_exit_compress(void) +{ + free_workspaces(); +} + +/* + * Copy uncompressed data from working buffer to pages. + * + * buf_start is the byte offset we're of the start of our workspace buffer. + * + * total_out is the last byte of the buffer + */ +int btrfs_decompress_buf2page(char *buf, unsigned long buf_start, + unsigned long total_out, u64 disk_start, + struct bio_vec *bvec, int vcnt, + unsigned long *page_index, + unsigned long *pg_offset) +{ + unsigned long buf_offset; + unsigned long current_buf_start; + unsigned long start_byte; + unsigned long working_bytes = total_out - buf_start; + unsigned long bytes; + char *kaddr; + struct page *page_out = bvec[*page_index].bv_page; + + /* + * start byte is the first byte of the page we're currently + * copying into relative to the start of the compressed data. + */ + start_byte = page_offset(page_out) - disk_start; + + /* we haven't yet hit data corresponding to this page */ + if (total_out <= start_byte) + return 1; + + /* + * the start of the data we care about is offset into + * the middle of our working buffer + */ + if (total_out > start_byte && buf_start < start_byte) { + buf_offset = start_byte - buf_start; + working_bytes -= buf_offset; + } else { + buf_offset = 0; + } + current_buf_start = buf_start; + + /* copy bytes from the working buffer into the pages */ + while (working_bytes > 0) { + bytes = min(PAGE_CACHE_SIZE - *pg_offset, + PAGE_CACHE_SIZE - buf_offset); + bytes = min(bytes, working_bytes); + kaddr = kmap_atomic(page_out, KM_USER0); + memcpy(kaddr + *pg_offset, buf + buf_offset, bytes); + kunmap_atomic(kaddr, KM_USER0); + flush_dcache_page(page_out); + + *pg_offset += bytes; + buf_offset += bytes; + working_bytes -= bytes; + current_buf_start += bytes; + + /* check if we need to pick another page */ + if (*pg_offset == PAGE_CACHE_SIZE) { + (*page_index)++; + if (*page_index >= vcnt) + return 0; + + page_out = bvec[*page_index].bv_page; + *pg_offset = 0; + start_byte = page_offset(page_out) - disk_start; + + /* + * make sure our new page is covered by this + * working buffer + */ + if (total_out <= start_byte) + return 1; + + /* + * the next page in the biovec might not be adjacent + * to the last page, but it might still be found + * inside this working buffer. bump our offset pointer + */ + if (total_out > start_byte && + current_buf_start < start_byte) { + buf_offset = start_byte - buf_start; + working_bytes = total_out - start_byte; + current_buf_start = buf_start + buf_offset; + } + } + } + + return 1; +} diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h index 421f5b4aa715..51000174b9d7 100644 --- a/fs/btrfs/compression.h +++ b/fs/btrfs/compression.h @@ -19,24 +19,27 @@ #ifndef __BTRFS_COMPRESSION_ #define __BTRFS_COMPRESSION_ -int btrfs_zlib_decompress(unsigned char *data_in, - struct page *dest_page, - unsigned long start_byte, - size_t srclen, size_t destlen); -int btrfs_zlib_compress_pages(struct address_space *mapping, - u64 start, unsigned long len, - struct page **pages, - unsigned long nr_dest_pages, - unsigned long *out_pages, - unsigned long *total_in, - unsigned long *total_out, - unsigned long max_out); -int btrfs_zlib_decompress_biovec(struct page **pages_in, - u64 disk_start, - struct bio_vec *bvec, - int vcnt, - size_t srclen); -void btrfs_zlib_exit(void); +int btrfs_init_compress(void); +void btrfs_exit_compress(void); + +int btrfs_compress_pages(int type, struct address_space *mapping, + u64 start, unsigned long len, + struct page **pages, + unsigned long nr_dest_pages, + unsigned long *out_pages, + unsigned long *total_in, + unsigned long *total_out, + unsigned long max_out); +int btrfs_decompress_biovec(int type, struct page **pages_in, u64 disk_start, + struct bio_vec *bvec, int vcnt, size_t srclen); +int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page, + unsigned long start_byte, size_t srclen, size_t destlen); +int btrfs_decompress_buf2page(char *buf, unsigned long buf_start, + unsigned long total_out, u64 disk_start, + struct bio_vec *bvec, int vcnt, + unsigned long *page_index, + unsigned long *pg_offset); + int btrfs_submit_compressed_write(struct inode *inode, u64 start, unsigned long len, u64 disk_start, unsigned long compressed_len, @@ -44,4 +47,37 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start, unsigned long nr_pages); int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, int mirror_num, unsigned long bio_flags); + +struct btrfs_compress_op { + struct list_head *(*alloc_workspace)(void); + + void (*free_workspace)(struct list_head *workspace); + + int (*compress_pages)(struct list_head *workspace, + struct address_space *mapping, + u64 start, unsigned long len, + struct page **pages, + unsigned long nr_dest_pages, + unsigned long *out_pages, + unsigned long *total_in, + unsigned long *total_out, + unsigned long max_out); + + int (*decompress_biovec)(struct list_head *workspace, + struct page **pages_in, + u64 disk_start, + struct bio_vec *bvec, + int vcnt, + size_t srclen); + + int (*decompress)(struct list_head *workspace, + unsigned char *data_in, + struct page *dest_page, + unsigned long start_byte, + size_t srclen, size_t destlen); +}; + +extern struct btrfs_compress_op btrfs_zlib_compress; +extern struct btrfs_compress_op btrfs_lzo_compress; + #endif diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 9ac171599258..b5baff0dccfe 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -105,6 +105,8 @@ noinline void btrfs_clear_path_blocking(struct btrfs_path *p, /* this also releases the path */ void btrfs_free_path(struct btrfs_path *p) { + if (!p) + return; btrfs_release_path(NULL, p); kmem_cache_free(btrfs_path_cachep, p); } @@ -2514,6 +2516,9 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_assert_tree_locked(path->nodes[1]); right = read_node_slot(root, upper, slot + 1); + if (right == NULL) + return 1; + btrfs_tree_lock(right); btrfs_set_lock_blocking(right); @@ -2764,6 +2769,9 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_assert_tree_locked(path->nodes[1]); left = read_node_slot(root, path->nodes[1], slot - 1); + if (left == NULL) + return 1; + btrfs_tree_lock(left); btrfs_set_lock_blocking(left); diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index b875d445ea81..2c98b3af6052 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -295,6 +295,14 @@ static inline unsigned long btrfs_chunk_item_size(int num_stripes) #define BTRFS_FSID_SIZE 16 #define BTRFS_HEADER_FLAG_WRITTEN (1ULL << 0) #define BTRFS_HEADER_FLAG_RELOC (1ULL << 1) + +/* + * File system states + */ + +/* Errors detected */ +#define BTRFS_SUPER_FLAG_ERROR (1ULL << 2) + #define BTRFS_SUPER_FLAG_SEEDING (1ULL << 32) #define BTRFS_SUPER_FLAG_METADUMP (1ULL << 33) @@ -399,13 +407,15 @@ struct btrfs_super_block { #define BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF (1ULL << 0) #define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL (1ULL << 1) #define BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS (1ULL << 2) +#define BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO (1ULL << 3) #define BTRFS_FEATURE_COMPAT_SUPP 0ULL #define BTRFS_FEATURE_COMPAT_RO_SUPP 0ULL #define BTRFS_FEATURE_INCOMPAT_SUPP \ (BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF | \ BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL | \ - BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) + BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS | \ + BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO) /* * A leaf is full of items. offset and size tell us where to find @@ -552,9 +562,11 @@ struct btrfs_timespec { } __attribute__ ((__packed__)); enum btrfs_compression_type { - BTRFS_COMPRESS_NONE = 0, - BTRFS_COMPRESS_ZLIB = 1, - BTRFS_COMPRESS_LAST = 2, + BTRFS_COMPRESS_NONE = 0, + BTRFS_COMPRESS_ZLIB = 1, + BTRFS_COMPRESS_LZO = 2, + BTRFS_COMPRESS_TYPES = 2, + BTRFS_COMPRESS_LAST = 3, }; struct btrfs_inode_item { @@ -598,6 +610,8 @@ struct btrfs_dir_item { u8 type; } __attribute__ ((__packed__)); +#define BTRFS_ROOT_SUBVOL_RDONLY (1ULL << 0) + struct btrfs_root_item { struct btrfs_inode_item inode; __le64 generation; @@ -896,7 +910,8 @@ struct btrfs_fs_info { */ u64 last_trans_log_full_commit; u64 open_ioctl_trans; - unsigned long mount_opt; + unsigned long mount_opt:20; + unsigned long compress_type:4; u64 max_inline; u64 alloc_start; struct btrfs_transaction *running_transaction; @@ -1051,6 +1066,9 @@ struct btrfs_fs_info { unsigned metadata_ratio; void *bdev_holder; + + /* filesystem state */ + u64 fs_state; }; /* @@ -1894,6 +1912,11 @@ BTRFS_SETGET_STACK_FUNCS(root_limit, struct btrfs_root_item, byte_limit, 64); BTRFS_SETGET_STACK_FUNCS(root_last_snapshot, struct btrfs_root_item, last_snapshot, 64); +static inline bool btrfs_root_readonly(struct btrfs_root *root) +{ + return root->root_item.flags & BTRFS_ROOT_SUBVOL_RDONLY; +} + /* struct btrfs_super_block */ BTRFS_SETGET_STACK_FUNCS(super_bytenr, struct btrfs_super_block, bytenr, 64); @@ -2146,6 +2169,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, int btrfs_remove_block_group(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 group_start); u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags); +u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data); void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde); void btrfs_clear_space_info_full(struct btrfs_fs_info *info); int btrfs_check_data_free_space(struct inode *inode, u64 bytes); @@ -2189,6 +2213,12 @@ int btrfs_set_block_group_ro(struct btrfs_root *root, int btrfs_set_block_group_rw(struct btrfs_root *root, struct btrfs_block_group_cache *cache); void btrfs_put_block_group_cache(struct btrfs_fs_info *info); +u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo); +int btrfs_error_unpin_extent_range(struct btrfs_root *root, + u64 start, u64 end); +int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr, + u64 num_bytes); + /* ctree.c */ int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, int level, int *slot); @@ -2542,6 +2572,14 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size); /* super.c */ int btrfs_parse_options(struct btrfs_root *root, char *options); int btrfs_sync_fs(struct super_block *sb, int wait); +void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function, + unsigned int line, int errno); + +#define btrfs_std_error(fs_info, errno) \ +do { \ + if ((errno)) \ + __btrfs_std_error((fs_info), __func__, __LINE__, (errno));\ +} while (0) /* acl.c */ #ifdef CONFIG_BTRFS_FS_POSIX_ACL diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 51d2e4de34eb..b531c36455d8 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -44,6 +44,20 @@ static struct extent_io_ops btree_extent_io_ops; static void end_workqueue_fn(struct btrfs_work *work); static void free_fs_root(struct btrfs_root *root); +static void btrfs_check_super_valid(struct btrfs_fs_info *fs_info, + int read_only); +static int btrfs_destroy_ordered_operations(struct btrfs_root *root); +static int btrfs_destroy_ordered_extents(struct btrfs_root *root); +static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, + struct btrfs_root *root); +static int btrfs_destroy_pending_snapshots(struct btrfs_transaction *t); +static int btrfs_destroy_delalloc_inodes(struct btrfs_root *root); +static int btrfs_destroy_marked_extents(struct btrfs_root *root, + struct extent_io_tree *dirty_pages, + int mark); +static int btrfs_destroy_pinned_extent(struct btrfs_root *root, + struct extent_io_tree *pinned_extents); +static int btrfs_cleanup_transaction(struct btrfs_root *root); /* * end_io_wq structs are used to do processing in task context when an IO is @@ -353,6 +367,10 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page) WARN_ON(len == 0); eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS); + if (eb == NULL) { + WARN_ON(1); + goto out; + } ret = btree_read_extent_buffer_pages(root, eb, start + PAGE_CACHE_SIZE, btrfs_header_generation(eb)); BUG_ON(ret); @@ -427,6 +445,10 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, WARN_ON(len == 0); eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS); + if (eb == NULL) { + ret = -EIO; + goto out; + } found_start = btrfs_header_bytenr(eb); if (found_start != start) { @@ -1145,6 +1167,7 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, } btrfs_free_path(path); if (ret) { + kfree(root); if (ret > 0) ret = -ENOENT; return ERR_PTR(ret); @@ -1713,8 +1736,10 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info, BTRFS_ROOT_TREE_OBJECTID); bh = btrfs_read_dev_super(fs_devices->latest_bdev); - if (!bh) + if (!bh) { + err = -EINVAL; goto fail_iput; + } memcpy(&fs_info->super_copy, bh->b_data, sizeof(fs_info->super_copy)); memcpy(&fs_info->super_for_commit, &fs_info->super_copy, @@ -1727,6 +1752,11 @@ struct btrfs_root *open_ctree(struct super_block *sb, if (!btrfs_super_root(disk_super)) goto fail_iput; + /* check FS state, whether FS is broken. */ + fs_info->fs_state |= btrfs_super_flags(disk_super); + + btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY); + ret = btrfs_parse_options(tree_root, options); if (ret) { err = ret; @@ -1744,10 +1774,10 @@ struct btrfs_root *open_ctree(struct super_block *sb, } features = btrfs_super_incompat_flags(disk_super); - if (!(features & BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF)) { - features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF; - btrfs_set_super_incompat_flags(disk_super, features); - } + features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF; + if (tree_root->fs_info->compress_type & BTRFS_COMPRESS_LZO) + features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO; + btrfs_set_super_incompat_flags(disk_super, features); features = btrfs_super_compat_ro_flags(disk_super) & ~BTRFS_FEATURE_COMPAT_RO_SUPP; @@ -1957,7 +1987,9 @@ struct btrfs_root *open_ctree(struct super_block *sb, btrfs_set_opt(fs_info->mount_opt, SSD); } - if (btrfs_super_log_root(disk_super) != 0) { + /* do not make disk changes in broken FS */ + if (btrfs_super_log_root(disk_super) != 0 && + !(fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)) { u64 bytenr = btrfs_super_log_root(disk_super); if (fs_devices->rw_devices == 0) { @@ -2442,8 +2474,28 @@ int close_ctree(struct btrfs_root *root) smp_mb(); btrfs_put_block_group_cache(fs_info); + + /* + * Here come 2 situations when btrfs is broken to flip readonly: + * + * 1. when btrfs flips readonly somewhere else before + * btrfs_commit_super, sb->s_flags has MS_RDONLY flag, + * and btrfs will skip to write sb directly to keep + * ERROR state on disk. + * + * 2. when btrfs flips readonly just in btrfs_commit_super, + * and in such case, btrfs cannnot write sb via btrfs_commit_super, + * and since fs_state has been set BTRFS_SUPER_FLAG_ERROR flag, + * btrfs will cleanup all FS resources first and write sb then. + */ if (!(fs_info->sb->s_flags & MS_RDONLY)) { - ret = btrfs_commit_super(root); + ret = btrfs_commit_super(root); + if (ret) + printk(KERN_ERR "btrfs: commit super ret %d\n", ret); + } + + if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { + ret = btrfs_error_commit_super(root); if (ret) printk(KERN_ERR "btrfs: commit super ret %d\n", ret); } @@ -2619,6 +2671,352 @@ out: return 0; } +static void btrfs_check_super_valid(struct btrfs_fs_info *fs_info, + int read_only) +{ + if (read_only) + return; + + if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) + printk(KERN_WARNING "warning: mount fs with errors, " + "running btrfsck is recommended\n"); +} + +int btrfs_error_commit_super(struct btrfs_root *root) +{ + int ret; + + mutex_lock(&root->fs_info->cleaner_mutex); + btrfs_run_delayed_iputs(root); + mutex_unlock(&root->fs_info->cleaner_mutex); + + down_write(&root->fs_info->cleanup_work_sem); + up_write(&root->fs_info->cleanup_work_sem); + + /* cleanup FS via transaction */ + btrfs_cleanup_transaction(root); + + ret = write_ctree_super(NULL, root, 0); + + return ret; +} + +static int btrfs_destroy_ordered_operations(struct btrfs_root *root) +{ + struct btrfs_inode *btrfs_inode; + struct list_head splice; + + INIT_LIST_HEAD(&splice); + + mutex_lock(&root->fs_info->ordered_operations_mutex); + spin_lock(&root->fs_info->ordered_extent_lock); + + list_splice_init(&root->fs_info->ordered_operations, &splice); + while (!list_empty(&splice)) { + btrfs_inode = list_entry(splice.next, struct btrfs_inode, + ordered_operations); + + list_del_init(&btrfs_inode->ordered_operations); + + btrfs_invalidate_inodes(btrfs_inode->root); + } + + spin_unlock(&root->fs_info->ordered_extent_lock); + mutex_unlock(&root->fs_info->ordered_operations_mutex); + + return 0; +} + +static int btrfs_destroy_ordered_extents(struct btrfs_root *root) +{ + struct list_head splice; + struct btrfs_ordered_extent *ordered; + struct inode *inode; + + INIT_LIST_HEAD(&splice); + + spin_lock(&root->fs_info->ordered_extent_lock); + + list_splice_init(&root->fs_info->ordered_extents, &splice); + while (!list_empty(&splice)) { + ordered = list_entry(splice.next, struct btrfs_ordered_extent, + root_extent_list); + + list_del_init(&ordered->root_extent_list); + atomic_inc(&ordered->refs); + + /* the inode may be getting freed (in sys_unlink path). */ + inode = igrab(ordered->inode); + + spin_unlock(&root->fs_info->ordered_extent_lock); + if (inode) + iput(inode); + + atomic_set(&ordered->refs, 1); + btrfs_put_ordered_extent(ordered); + + spin_lock(&root->fs_info->ordered_extent_lock); + } + + spin_unlock(&root->fs_info->ordered_extent_lock); + + return 0; +} + +static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, + struct btrfs_root *root) +{ + struct rb_node *node; + struct btrfs_delayed_ref_root *delayed_refs; + struct btrfs_delayed_ref_node *ref; + int ret = 0; + + delayed_refs = &trans->delayed_refs; + + spin_lock(&delayed_refs->lock); + if (delayed_refs->num_entries == 0) { + printk(KERN_INFO "delayed_refs has NO entry\n"); + return ret; + } + + node = rb_first(&delayed_refs->root); + while (node) { + ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); + node = rb_next(node); + + ref->in_tree = 0; + rb_erase(&ref->rb_node, &delayed_refs->root); + delayed_refs->num_entries--; + + atomic_set(&ref->refs, 1); + if (btrfs_delayed_ref_is_head(ref)) { + struct btrfs_delayed_ref_head *head; + + head = btrfs_delayed_node_to_head(ref); + mutex_lock(&head->mutex); + kfree(head->extent_op); + delayed_refs->num_heads--; + if (list_empty(&head->cluster)) + delayed_refs->num_heads_ready--; + list_del_init(&head->cluster); + mutex_unlock(&head->mutex); + } + + spin_unlock(&delayed_refs->lock); + btrfs_put_delayed_ref(ref); + + cond_resched(); + spin_lock(&delayed_refs->lock); + } + + spin_unlock(&delayed_refs->lock); + + return ret; +} + +static int btrfs_destroy_pending_snapshots(struct btrfs_transaction *t) +{ + struct btrfs_pending_snapshot *snapshot; + struct list_head splice; + + INIT_LIST_HEAD(&splice); + + list_splice_init(&t->pending_snapshots, &splice); + + while (!list_empty(&splice)) { + snapshot = list_entry(splice.next, + struct btrfs_pending_snapshot, + list); + + list_del_init(&snapshot->list); + + kfree(snapshot); + } + + return 0; +} + +static int btrfs_destroy_delalloc_inodes(struct btrfs_root *root) +{ + struct btrfs_inode *btrfs_inode; + struct list_head splice; + + INIT_LIST_HEAD(&splice); + + list_splice_init(&root->fs_info->delalloc_inodes, &splice); + + spin_lock(&root->fs_info->delalloc_lock); + + while (!list_empty(&splice)) { + btrfs_inode = list_entry(splice.next, struct btrfs_inode, + delalloc_inodes); + + list_del_init(&btrfs_inode->delalloc_inodes); + + btrfs_invalidate_inodes(btrfs_inode->root); + } + + spin_unlock(&root->fs_info->delalloc_lock); + + return 0; +} + +static int btrfs_destroy_marked_extents(struct btrfs_root *root, + struct extent_io_tree *dirty_pages, + int mark) +{ + int ret; + struct page *page; + struct inode *btree_inode = root->fs_info->btree_inode; + struct extent_buffer *eb; + u64 start = 0; + u64 end; + u64 offset; + unsigned long index; + + while (1) { + ret = find_first_extent_bit(dirty_pages, start, &start, &end, + mark); + if (ret) + break; + + clear_extent_bits(dirty_pages, start, end, mark, GFP_NOFS); + while (start <= end) { + index = start >> PAGE_CACHE_SHIFT; + start = (u64)(index + 1) << PAGE_CACHE_SHIFT; + page = find_get_page(btree_inode->i_mapping, index); + if (!page) + continue; + offset = page_offset(page); + + spin_lock(&dirty_pages->buffer_lock); + eb = radix_tree_lookup( + &(&BTRFS_I(page->mapping->host)->io_tree)->buffer, + offset >> PAGE_CACHE_SHIFT); + spin_unlock(&dirty_pages->buffer_lock); + if (eb) { + ret = test_and_clear_bit(EXTENT_BUFFER_DIRTY, + &eb->bflags); + atomic_set(&eb->refs, 1); + } + if (PageWriteback(page)) + end_page_writeback(page); + + lock_page(page); + if (PageDirty(page)) { + clear_page_dirty_for_io(page); + spin_lock_irq(&page->mapping->tree_lock); + radix_tree_tag_clear(&page->mapping->page_tree, + page_index(page), + PAGECACHE_TAG_DIRTY); + spin_unlock_irq(&page->mapping->tree_lock); + } + + page->mapping->a_ops->invalidatepage(page, 0); + unlock_page(page); + } + } + + return ret; +} + +static int btrfs_destroy_pinned_extent(struct btrfs_root *root, + struct extent_io_tree *pinned_extents) +{ + struct extent_io_tree *unpin; + u64 start; + u64 end; + int ret; + + unpin = pinned_extents; + while (1) { + ret = find_first_extent_bit(unpin, 0, &start, &end, + EXTENT_DIRTY); + if (ret) + break; + + /* opt_discard */ + ret = btrfs_error_discard_extent(root, start, end + 1 - start); + + clear_extent_dirty(unpin, start, end, GFP_NOFS); + btrfs_error_unpin_extent_range(root, start, end); + cond_resched(); + } + + return 0; +} + +static int btrfs_cleanup_transaction(struct btrfs_root *root) +{ + struct btrfs_transaction *t; + LIST_HEAD(list); + + WARN_ON(1); + + mutex_lock(&root->fs_info->trans_mutex); + mutex_lock(&root->fs_info->transaction_kthread_mutex); + + list_splice_init(&root->fs_info->trans_list, &list); + while (!list_empty(&list)) { + t = list_entry(list.next, struct btrfs_transaction, list); + if (!t) + break; + + btrfs_destroy_ordered_operations(root); + + btrfs_destroy_ordered_extents(root); + + btrfs_destroy_delayed_refs(t, root); + + btrfs_block_rsv_release(root, + &root->fs_info->trans_block_rsv, + t->dirty_pages.dirty_bytes); + + /* FIXME: cleanup wait for commit */ + t->in_commit = 1; + t->blocked = 1; + if (waitqueue_active(&root->fs_info->transaction_blocked_wait)) + wake_up(&root->fs_info->transaction_blocked_wait); + + t->blocked = 0; + if (waitqueue_active(&root->fs_info->transaction_wait)) + wake_up(&root->fs_info->transaction_wait); + mutex_unlock(&root->fs_info->trans_mutex); + + mutex_lock(&root->fs_info->trans_mutex); + t->commit_done = 1; + if (waitqueue_active(&t->commit_wait)) + wake_up(&t->commit_wait); + mutex_unlock(&root->fs_info->trans_mutex); + + mutex_lock(&root->fs_info->trans_mutex); + + btrfs_destroy_pending_snapshots(t); + + btrfs_destroy_delalloc_inodes(root); + + spin_lock(&root->fs_info->new_trans_lock); + root->fs_info->running_transaction = NULL; + spin_unlock(&root->fs_info->new_trans_lock); + + btrfs_destroy_marked_extents(root, &t->dirty_pages, + EXTENT_DIRTY); + + btrfs_destroy_pinned_extent(root, + root->fs_info->pinned_extents); + + t->use_count = 0; + list_del_init(&t->list); + memset(t, 0, sizeof(*t)); + kmem_cache_free(btrfs_transaction_cachep, t); + } + + mutex_unlock(&root->fs_info->transaction_kthread_mutex); + mutex_unlock(&root->fs_info->trans_mutex); + + return 0; +} + static struct extent_io_ops btree_extent_io_ops = { .write_cache_pages_lock_hook = btree_lock_page_hook, .readpage_end_io_hook = btree_readpage_end_io_hook, diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 88e825a0bf21..07b20dc2fd95 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -52,6 +52,7 @@ int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root *root, int max_mirrors); struct buffer_head *btrfs_read_dev_super(struct block_device *bdev); int btrfs_commit_super(struct btrfs_root *root); +int btrfs_error_commit_super(struct btrfs_root *root); struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize); struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 227e5815d838..b55269340cec 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3089,7 +3089,7 @@ static u64 get_alloc_profile(struct btrfs_root *root, u64 flags) return btrfs_reduce_alloc_profile(root, flags); } -static u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data) +u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data) { u64 flags; @@ -3161,8 +3161,12 @@ alloc: bytes + 2 * 1024 * 1024, alloc_target, 0); btrfs_end_transaction(trans, root); - if (ret < 0) - return ret; + if (ret < 0) { + if (ret != -ENOSPC) + return ret; + else + goto commit_trans; + } if (!data_sinfo) { btrfs_set_inode_space_info(root, inode); @@ -3173,6 +3177,7 @@ alloc: spin_unlock(&data_sinfo->lock); /* commit the current transaction and try again */ +commit_trans: if (!committed && !root->fs_info->open_ioctl_trans) { committed = 1; trans = btrfs_join_transaction(root, 1); @@ -3721,11 +3726,6 @@ int btrfs_block_rsv_check(struct btrfs_trans_handle *trans, return 0; } - WARN_ON(1); - printk(KERN_INFO"block_rsv size %llu reserved %llu freed %llu %llu\n", - block_rsv->size, block_rsv->reserved, - block_rsv->freed[0], block_rsv->freed[1]); - return -ENOSPC; } @@ -7970,13 +7970,14 @@ static int set_block_group_ro(struct btrfs_block_group_cache *cache) if (sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned + sinfo->bytes_may_use + sinfo->bytes_readonly + - cache->reserved_pinned + num_bytes < sinfo->total_bytes) { + cache->reserved_pinned + num_bytes <= sinfo->total_bytes) { sinfo->bytes_readonly += num_bytes; sinfo->bytes_reserved += cache->reserved_pinned; cache->reserved_pinned = 0; cache->ro = 1; ret = 0; } + spin_unlock(&cache->lock); spin_unlock(&sinfo->lock); return ret; @@ -8012,6 +8013,62 @@ out: return ret; } +/* + * helper to account the unused space of all the readonly block group in the + * list. takes mirrors into account. + */ +static u64 __btrfs_get_ro_block_group_free_space(struct list_head *groups_list) +{ + struct btrfs_block_group_cache *block_group; + u64 free_bytes = 0; + int factor; + + list_for_each_entry(block_group, groups_list, list) { + spin_lock(&block_group->lock); + + if (!block_group->ro) { + spin_unlock(&block_group->lock); + continue; + } + + if (block_group->flags & (BTRFS_BLOCK_GROUP_RAID1 | + BTRFS_BLOCK_GROUP_RAID10 | + BTRFS_BLOCK_GROUP_DUP)) + factor = 2; + else + factor = 1; + + free_bytes += (block_group->key.offset - + btrfs_block_group_used(&block_group->item)) * + factor; + + spin_unlock(&block_group->lock); + } + + return free_bytes; +} + +/* + * helper to account the unused space of all the readonly block group in the + * space_info. takes mirrors into account. + */ +u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo) +{ + int i; + u64 free_bytes = 0; + + spin_lock(&sinfo->lock); + + for(i = 0; i < BTRFS_NR_RAID_TYPES; i++) + if (!list_empty(&sinfo->block_groups[i])) + free_bytes += __btrfs_get_ro_block_group_free_space( + &sinfo->block_groups[i]); + + spin_unlock(&sinfo->lock); + + return free_bytes; +} + int btrfs_set_block_group_rw(struct btrfs_root *root, struct btrfs_block_group_cache *cache) { @@ -8092,7 +8149,7 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) mutex_lock(&root->fs_info->chunk_mutex); list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) { u64 min_free = btrfs_block_group_used(&block_group->item); - u64 dev_offset, max_avail; + u64 dev_offset; /* * check to make sure we can actually find a chunk with enough @@ -8100,7 +8157,7 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) */ if (device->total_bytes > device->bytes_used + min_free) { ret = find_free_dev_extent(NULL, device, min_free, - &dev_offset, &max_avail); + &dev_offset, NULL); if (!ret) break; ret = -1; @@ -8584,3 +8641,14 @@ out: btrfs_free_path(path); return ret; } + +int btrfs_error_unpin_extent_range(struct btrfs_root *root, u64 start, u64 end) +{ + return unpin_extent_range(root, start, end); +} + +int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr, + u64 num_bytes) +{ + return btrfs_discard_extent(root, bytenr, num_bytes); +} diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 3e86b9f36507..2e993cf1766e 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2028,8 +2028,11 @@ static int __extent_read_full_page(struct extent_io_tree *tree, BUG_ON(extent_map_end(em) <= cur); BUG_ON(end < cur); - if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) + if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) { this_bio_flag = EXTENT_BIO_COMPRESSED; + extent_set_compress_type(&this_bio_flag, + em->compress_type); + } iosize = min(extent_map_end(em) - cur, end - cur + 1); cur_end = min(extent_map_end(em) - 1, end); @@ -3072,6 +3075,8 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree, #endif eb = kmem_cache_zalloc(extent_buffer_cache, mask); + if (eb == NULL) + return NULL; eb->start = start; eb->len = len; spin_lock_init(&eb->lock); diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 4183c8178f01..7083cfafd061 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -20,8 +20,12 @@ #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) #define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC) -/* flags for bio submission */ +/* + * flags for bio submission. The high bits indicate the compression + * type for this bio + */ #define EXTENT_BIO_COMPRESSED 1 +#define EXTENT_BIO_FLAG_SHIFT 16 /* these are bit numbers for test/set bit */ #define EXTENT_BUFFER_UPTODATE 0 @@ -135,6 +139,17 @@ struct extent_buffer { wait_queue_head_t lock_wq; }; +static inline void extent_set_compress_type(unsigned long *bio_flags, + int compress_type) +{ + *bio_flags |= compress_type << EXTENT_BIO_FLAG_SHIFT; +} + +static inline int extent_compress_type(unsigned long bio_flags) +{ + return bio_flags >> EXTENT_BIO_FLAG_SHIFT; +} + struct extent_map_tree; static inline struct extent_state *extent_state_next(struct extent_state *state) diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 23cb8da3ff66..b0e1fce12530 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -3,6 +3,7 @@ #include <linux/module.h> #include <linux/spinlock.h> #include <linux/hardirq.h> +#include "ctree.h" #include "extent_map.h" @@ -54,6 +55,7 @@ struct extent_map *alloc_extent_map(gfp_t mask) return em; em->in_tree = 0; em->flags = 0; + em->compress_type = BTRFS_COMPRESS_NONE; atomic_set(&em->refs, 1); return em; } diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index ab6d74b6e647..28b44dbd1e35 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h @@ -26,7 +26,8 @@ struct extent_map { unsigned long flags; struct block_device *bdev; atomic_t refs; - int in_tree; + unsigned int in_tree:1; + unsigned int compress_type:4; }; struct extent_map_tree { diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 66836d85763b..c800d58f3013 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -24,6 +24,7 @@ #include <linux/string.h> #include <linux/backing-dev.h> #include <linux/mpage.h> +#include <linux/falloc.h> #include <linux/swap.h> #include <linux/writeback.h> #include <linux/statfs.h> @@ -224,6 +225,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, split->bdev = em->bdev; split->flags = flags; + split->compress_type = em->compress_type; ret = add_extent_mapping(em_tree, split); BUG_ON(ret); free_extent_map(split); @@ -238,6 +240,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, split->len = em->start + em->len - (start + len); split->bdev = em->bdev; split->flags = flags; + split->compress_type = em->compress_type; if (compressed) { split->block_len = em->block_len; @@ -890,6 +893,17 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, if (err) goto out; + /* + * If BTRFS flips readonly due to some impossible error + * (fs_info->fs_state now has BTRFS_SUPER_FLAG_ERROR), + * although we have opened a file as writable, we have + * to stop this write operation to ensure FS consistency. + */ + if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { + err = -EROFS; + goto out; + } + file_update_time(file); BTRFS_I(inode)->sequence++; @@ -1237,6 +1251,117 @@ static int btrfs_file_mmap(struct file *filp, struct vm_area_struct *vma) return 0; } +static long btrfs_fallocate(struct file *file, int mode, + loff_t offset, loff_t len) +{ + struct inode *inode = file->f_path.dentry->d_inode; + struct extent_state *cached_state = NULL; + u64 cur_offset; + u64 last_byte; + u64 alloc_start; + u64 alloc_end; + u64 alloc_hint = 0; + u64 locked_end; + u64 mask = BTRFS_I(inode)->root->sectorsize - 1; + struct extent_map *em; + int ret; + + alloc_start = offset & ~mask; + alloc_end = (offset + len + mask) & ~mask; + + /* We only support the FALLOC_FL_KEEP_SIZE mode */ + if (mode & ~FALLOC_FL_KEEP_SIZE) + return -EOPNOTSUPP; + + /* + * wait for ordered IO before we have any locks. We'll loop again + * below with the locks held. + */ + btrfs_wait_ordered_range(inode, alloc_start, alloc_end - alloc_start); + + mutex_lock(&inode->i_mutex); + ret = inode_newsize_ok(inode, alloc_end); + if (ret) + goto out; + + if (alloc_start > inode->i_size) { + ret = btrfs_cont_expand(inode, alloc_start); + if (ret) + goto out; + } + + ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start); + if (ret) + goto out; + + locked_end = alloc_end - 1; + while (1) { + struct btrfs_ordered_extent *ordered; + + /* the extent lock is ordered inside the running + * transaction + */ + lock_extent_bits(&BTRFS_I(inode)->io_tree, alloc_start, + locked_end, 0, &cached_state, GFP_NOFS); + ordered = btrfs_lookup_first_ordered_extent(inode, + alloc_end - 1); + if (ordered && + ordered->file_offset + ordered->len > alloc_start && + ordered->file_offset < alloc_end) { + btrfs_put_ordered_extent(ordered); + unlock_extent_cached(&BTRFS_I(inode)->io_tree, + alloc_start, locked_end, + &cached_state, GFP_NOFS); + /* + * we can't wait on the range with the transaction + * running or with the extent lock held + */ + btrfs_wait_ordered_range(inode, alloc_start, + alloc_end - alloc_start); + } else { + if (ordered) + btrfs_put_ordered_extent(ordered); + break; + } + } + + cur_offset = alloc_start; + while (1) { + em = btrfs_get_extent(inode, NULL, 0, cur_offset, + alloc_end - cur_offset, 0); + BUG_ON(IS_ERR(em) || !em); + last_byte = min(extent_map_end(em), alloc_end); + last_byte = (last_byte + mask) & ~mask; + if (em->block_start == EXTENT_MAP_HOLE || + (cur_offset >= inode->i_size && + !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { + ret = btrfs_prealloc_file_range(inode, mode, cur_offset, + last_byte - cur_offset, + 1 << inode->i_blkbits, + offset + len, + &alloc_hint); + if (ret < 0) { + free_extent_map(em); + break; + } + } + free_extent_map(em); + + cur_offset = last_byte; + if (cur_offset >= alloc_end) { + ret = 0; + break; + } + } + unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, + &cached_state, GFP_NOFS); + + btrfs_free_reserved_data_space(inode, alloc_end - alloc_start); +out: + mutex_unlock(&inode->i_mutex); + return ret; +} + const struct file_operations btrfs_file_operations = { .llseek = generic_file_llseek, .read = do_sync_read, @@ -1248,6 +1373,7 @@ const struct file_operations btrfs_file_operations = { .open = generic_file_open, .release = btrfs_release_file, .fsync = btrfs_sync_file, + .fallocate = btrfs_fallocate, .unlocked_ioctl = btrfs_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = btrfs_ioctl, diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index a3798a3aa0d2..160b55b3e132 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -122,10 +122,10 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans, size_t cur_size = size; size_t datasize; unsigned long offset; - int use_compress = 0; + int compress_type = BTRFS_COMPRESS_NONE; if (compressed_size && compressed_pages) { - use_compress = 1; + compress_type = root->fs_info->compress_type; cur_size = compressed_size; } @@ -159,7 +159,7 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans, btrfs_set_file_extent_ram_bytes(leaf, ei, size); ptr = btrfs_file_extent_inline_start(ei); - if (use_compress) { + if (compress_type != BTRFS_COMPRESS_NONE) { struct page *cpage; int i = 0; while (compressed_size > 0) { @@ -176,7 +176,7 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans, compressed_size -= cur_size; } btrfs_set_file_extent_compression(leaf, ei, - BTRFS_COMPRESS_ZLIB); + compress_type); } else { page = find_get_page(inode->i_mapping, start >> PAGE_CACHE_SHIFT); @@ -263,6 +263,7 @@ struct async_extent { u64 compressed_size; struct page **pages; unsigned long nr_pages; + int compress_type; struct list_head list; }; @@ -280,7 +281,8 @@ static noinline int add_async_extent(struct async_cow *cow, u64 start, u64 ram_size, u64 compressed_size, struct page **pages, - unsigned long nr_pages) + unsigned long nr_pages, + int compress_type) { struct async_extent *async_extent; @@ -290,6 +292,7 @@ static noinline int add_async_extent(struct async_cow *cow, async_extent->compressed_size = compressed_size; async_extent->pages = pages; async_extent->nr_pages = nr_pages; + async_extent->compress_type = compress_type; list_add_tail(&async_extent->list, &cow->extents); return 0; } @@ -332,6 +335,7 @@ static noinline int compress_file_range(struct inode *inode, unsigned long max_uncompressed = 128 * 1024; int i; int will_compress; + int compress_type = root->fs_info->compress_type; actual_end = min_t(u64, isize, end + 1); again: @@ -381,12 +385,16 @@ again: WARN_ON(pages); pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS); - ret = btrfs_zlib_compress_pages(inode->i_mapping, start, - total_compressed, pages, - nr_pages, &nr_pages_ret, - &total_in, - &total_compressed, - max_compressed); + if (BTRFS_I(inode)->force_compress) + compress_type = BTRFS_I(inode)->force_compress; + + ret = btrfs_compress_pages(compress_type, + inode->i_mapping, start, + total_compressed, pages, + nr_pages, &nr_pages_ret, + &total_in, + &total_compressed, + max_compressed); if (!ret) { unsigned long offset = total_compressed & @@ -493,7 +501,8 @@ again: * and will submit them to the elevator. */ add_async_extent(async_cow, start, num_bytes, - total_compressed, pages, nr_pages_ret); + total_compressed, pages, nr_pages_ret, + compress_type); if (start + num_bytes < end) { start += num_bytes; @@ -515,7 +524,8 @@ cleanup_and_bail_uncompressed: __set_page_dirty_nobuffers(locked_page); /* unlocked later on in the async handlers */ } - add_async_extent(async_cow, start, end - start + 1, 0, NULL, 0); + add_async_extent(async_cow, start, end - start + 1, + 0, NULL, 0, BTRFS_COMPRESS_NONE); *num_added += 1; } @@ -640,6 +650,7 @@ retry: em->block_start = ins.objectid; em->block_len = ins.offset; em->bdev = root->fs_info->fs_devices->latest_bdev; + em->compress_type = async_extent->compress_type; set_bit(EXTENT_FLAG_PINNED, &em->flags); set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); @@ -656,11 +667,13 @@ retry: async_extent->ram_size - 1, 0); } - ret = btrfs_add_ordered_extent(inode, async_extent->start, - ins.objectid, - async_extent->ram_size, - ins.offset, - BTRFS_ORDERED_COMPRESSED); + ret = btrfs_add_ordered_extent_compress(inode, + async_extent->start, + ins.objectid, + async_extent->ram_size, + ins.offset, + BTRFS_ORDERED_COMPRESSED, + async_extent->compress_type); BUG_ON(ret); /* @@ -1670,7 +1683,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) struct btrfs_ordered_extent *ordered_extent = NULL; struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; struct extent_state *cached_state = NULL; - int compressed = 0; + int compress_type = 0; int ret; bool nolock = false; @@ -1711,9 +1724,9 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) trans->block_rsv = &root->fs_info->delalloc_block_rsv; if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags)) - compressed = 1; + compress_type = ordered_extent->compress_type; if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) { - BUG_ON(compressed); + BUG_ON(compress_type); ret = btrfs_mark_extent_written(trans, inode, ordered_extent->file_offset, ordered_extent->file_offset + @@ -1727,7 +1740,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) ordered_extent->disk_len, ordered_extent->len, ordered_extent->len, - compressed, 0, 0, + compress_type, 0, 0, BTRFS_FILE_EXTENT_REG); unpin_extent_cache(&BTRFS_I(inode)->extent_tree, ordered_extent->file_offset, @@ -1829,6 +1842,8 @@ static int btrfs_io_failed_hook(struct bio *failed_bio, if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) { logical = em->block_start; failrec->bio_flags = EXTENT_BIO_COMPRESSED; + extent_set_compress_type(&failrec->bio_flags, + em->compress_type); } failrec->logical = logical; free_extent_map(em); @@ -3671,8 +3686,12 @@ static int btrfs_setattr_size(struct inode *inode, struct iattr *attr) static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) { struct inode *inode = dentry->d_inode; + struct btrfs_root *root = BTRFS_I(inode)->root; int err; + if (btrfs_root_readonly(root)) + return -EROFS; + err = inode_change_ok(inode, attr); if (err) return err; @@ -4928,8 +4947,10 @@ static noinline int uncompress_inline(struct btrfs_path *path, size_t max_size; unsigned long inline_size; unsigned long ptr; + int compress_type; WARN_ON(pg_offset != 0); + compress_type = btrfs_file_extent_compression(leaf, item); max_size = btrfs_file_extent_ram_bytes(leaf, item); inline_size = btrfs_file_extent_inline_item_len(leaf, btrfs_item_nr(leaf, path->slots[0])); @@ -4939,8 +4960,8 @@ static noinline int uncompress_inline(struct btrfs_path *path, read_extent_buffer(leaf, tmp, ptr, inline_size); max_size = min_t(unsigned long, PAGE_CACHE_SIZE, max_size); - ret = btrfs_zlib_decompress(tmp, page, extent_offset, - inline_size, max_size); + ret = btrfs_decompress(compress_type, tmp, page, + extent_offset, inline_size, max_size); if (ret) { char *kaddr = kmap_atomic(page, KM_USER0); unsigned long copy_size = min_t(u64, @@ -4982,7 +5003,7 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; struct btrfs_trans_handle *trans = NULL; - int compressed; + int compress_type; again: read_lock(&em_tree->lock); @@ -5041,7 +5062,7 @@ again: found_type = btrfs_file_extent_type(leaf, item); extent_start = found_key.offset; - compressed = btrfs_file_extent_compression(leaf, item); + compress_type = btrfs_file_extent_compression(leaf, item); if (found_type == BTRFS_FILE_EXTENT_REG || found_type == BTRFS_FILE_EXTENT_PREALLOC) { extent_end = extent_start + @@ -5087,8 +5108,9 @@ again: em->block_start = EXTENT_MAP_HOLE; goto insert; } - if (compressed) { + if (compress_type != BTRFS_COMPRESS_NONE) { set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); + em->compress_type = compress_type; em->block_start = bytenr; em->block_len = btrfs_file_extent_disk_num_bytes(leaf, item); @@ -5122,12 +5144,14 @@ again: em->len = (copy_size + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); em->orig_start = EXTENT_MAP_INLINE; - if (compressed) + if (compress_type) { set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); + em->compress_type = compress_type; + } ptr = btrfs_file_extent_inline_start(item) + extent_offset; if (create == 0 && !PageUptodate(page)) { - if (btrfs_file_extent_compression(leaf, item) == - BTRFS_COMPRESS_ZLIB) { + if (btrfs_file_extent_compression(leaf, item) != + BTRFS_COMPRESS_NONE) { ret = uncompress_inline(path, inode, page, pg_offset, extent_offset, item); @@ -6477,7 +6501,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) ei->ordered_data_close = 0; ei->orphan_meta_reserved = 0; ei->dummy_inode = 0; - ei->force_compress = 0; + ei->force_compress = BTRFS_COMPRESS_NONE; inode = &ei->vfs_inode; extent_map_tree_init(&ei->extent_tree, GFP_NOFS); @@ -7098,116 +7122,6 @@ int btrfs_prealloc_file_range_trans(struct inode *inode, min_size, actual_len, alloc_hint, trans); } -static long btrfs_fallocate(struct inode *inode, int mode, - loff_t offset, loff_t len) -{ - struct extent_state *cached_state = NULL; - u64 cur_offset; - u64 last_byte; - u64 alloc_start; - u64 alloc_end; - u64 alloc_hint = 0; - u64 locked_end; - u64 mask = BTRFS_I(inode)->root->sectorsize - 1; - struct extent_map *em; - int ret; - - alloc_start = offset & ~mask; - alloc_end = (offset + len + mask) & ~mask; - - /* We only support the FALLOC_FL_KEEP_SIZE mode */ - if (mode && (mode != FALLOC_FL_KEEP_SIZE)) - return -EOPNOTSUPP; - - /* - * wait for ordered IO before we have any locks. We'll loop again - * below with the locks held. - */ - btrfs_wait_ordered_range(inode, alloc_start, alloc_end - alloc_start); - - mutex_lock(&inode->i_mutex); - ret = inode_newsize_ok(inode, alloc_end); - if (ret) - goto out; - - if (alloc_start > inode->i_size) { - ret = btrfs_cont_expand(inode, alloc_start); - if (ret) - goto out; - } - - ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start); - if (ret) - goto out; - - locked_end = alloc_end - 1; - while (1) { - struct btrfs_ordered_extent *ordered; - - /* the extent lock is ordered inside the running - * transaction - */ - lock_extent_bits(&BTRFS_I(inode)->io_tree, alloc_start, - locked_end, 0, &cached_state, GFP_NOFS); - ordered = btrfs_lookup_first_ordered_extent(inode, - alloc_end - 1); - if (ordered && - ordered->file_offset + ordered->len > alloc_start && - ordered->file_offset < alloc_end) { - btrfs_put_ordered_extent(ordered); - unlock_extent_cached(&BTRFS_I(inode)->io_tree, - alloc_start, locked_end, - &cached_state, GFP_NOFS); - /* - * we can't wait on the range with the transaction - * running or with the extent lock held - */ - btrfs_wait_ordered_range(inode, alloc_start, - alloc_end - alloc_start); - } else { - if (ordered) - btrfs_put_ordered_extent(ordered); - break; - } - } - - cur_offset = alloc_start; - while (1) { - em = btrfs_get_extent(inode, NULL, 0, cur_offset, - alloc_end - cur_offset, 0); - BUG_ON(IS_ERR(em) || !em); - last_byte = min(extent_map_end(em), alloc_end); - last_byte = (last_byte + mask) & ~mask; - if (em->block_start == EXTENT_MAP_HOLE || - (cur_offset >= inode->i_size && - !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { - ret = btrfs_prealloc_file_range(inode, mode, cur_offset, - last_byte - cur_offset, - 1 << inode->i_blkbits, - offset + len, - &alloc_hint); - if (ret < 0) { - free_extent_map(em); - break; - } - } - free_extent_map(em); - - cur_offset = last_byte; - if (cur_offset >= alloc_end) { - ret = 0; - break; - } - } - unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, - &cached_state, GFP_NOFS); - - btrfs_free_reserved_data_space(inode, alloc_end - alloc_start); -out: - mutex_unlock(&inode->i_mutex); - return ret; -} - static int btrfs_set_page_dirty(struct page *page) { return __set_page_dirty_nobuffers(page); @@ -7215,6 +7129,10 @@ static int btrfs_set_page_dirty(struct page *page) static int btrfs_permission(struct inode *inode, int mask, unsigned int flags) { + struct btrfs_root *root = BTRFS_I(inode)->root; + + if (btrfs_root_readonly(root) && (mask & MAY_WRITE)) + return -EROFS; if ((BTRFS_I(inode)->flags & BTRFS_INODE_READONLY) && (mask & MAY_WRITE)) return -EACCES; return generic_permission(inode, mask, flags, btrfs_check_acl); @@ -7310,7 +7228,6 @@ static const struct inode_operations btrfs_file_inode_operations = { .listxattr = btrfs_listxattr, .removexattr = btrfs_removexattr, .permission = btrfs_permission, - .fallocate = btrfs_fallocate, .fiemap = btrfs_fiemap, }; static const struct inode_operations btrfs_special_inode_operations = { diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index f87552a1d7ea..a506a22b522a 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -147,6 +147,9 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) unsigned int flags, oldflags; int ret; + if (btrfs_root_readonly(root)) + return -EROFS; + if (copy_from_user(&flags, arg, sizeof(flags))) return -EFAULT; @@ -360,7 +363,8 @@ fail: } static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, - char *name, int namelen, u64 *async_transid) + char *name, int namelen, u64 *async_transid, + bool readonly) { struct inode *inode; struct dentry *parent; @@ -378,6 +382,7 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, btrfs_init_block_rsv(&pending_snapshot->block_rsv); pending_snapshot->dentry = dentry; pending_snapshot->root = root; + pending_snapshot->readonly = readonly; trans = btrfs_start_transaction(root->fs_info->extent_root, 5); if (IS_ERR(trans)) { @@ -509,7 +514,7 @@ static inline int btrfs_may_create(struct inode *dir, struct dentry *child) static noinline int btrfs_mksubvol(struct path *parent, char *name, int namelen, struct btrfs_root *snap_src, - u64 *async_transid) + u64 *async_transid, bool readonly) { struct inode *dir = parent->dentry->d_inode; struct dentry *dentry; @@ -541,7 +546,7 @@ static noinline int btrfs_mksubvol(struct path *parent, if (snap_src) { error = create_snapshot(snap_src, dentry, - name, namelen, async_transid); + name, namelen, async_transid, readonly); } else { error = create_subvol(BTRFS_I(dir)->root, dentry, name, namelen, async_transid); @@ -638,9 +643,11 @@ static int btrfs_defrag_file(struct file *file, struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; struct btrfs_ordered_extent *ordered; struct page *page; + struct btrfs_super_block *disk_super; unsigned long last_index; unsigned long ra_pages = root->fs_info->bdi.ra_pages; unsigned long total_read = 0; + u64 features; u64 page_start; u64 page_end; u64 last_len = 0; @@ -648,6 +655,14 @@ static int btrfs_defrag_file(struct file *file, u64 defrag_end = 0; unsigned long i; int ret; + int compress_type = BTRFS_COMPRESS_ZLIB; + + if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) { + if (range->compress_type > BTRFS_COMPRESS_TYPES) + return -EINVAL; + if (range->compress_type) + compress_type = range->compress_type; + } if (inode->i_size == 0) return 0; @@ -683,7 +698,7 @@ static int btrfs_defrag_file(struct file *file, total_read++; mutex_lock(&inode->i_mutex); if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) - BTRFS_I(inode)->force_compress = 1; + BTRFS_I(inode)->force_compress = compress_type; ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE); if (ret) @@ -781,10 +796,17 @@ loop_unlock: atomic_dec(&root->fs_info->async_submit_draining); mutex_lock(&inode->i_mutex); - BTRFS_I(inode)->force_compress = 0; + BTRFS_I(inode)->force_compress = BTRFS_COMPRESS_NONE; mutex_unlock(&inode->i_mutex); } + disk_super = &root->fs_info->super_copy; + features = btrfs_super_incompat_flags(disk_super); + if (range->compress_type == BTRFS_COMPRESS_LZO) { + features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO; + btrfs_set_super_incompat_flags(disk_super, features); + } + return 0; err_reservations: @@ -901,7 +923,8 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file, char *name, unsigned long fd, int subvol, - u64 *transid) + u64 *transid, + bool readonly) { struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; struct file *src_file; @@ -919,7 +942,7 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file, if (subvol) { ret = btrfs_mksubvol(&file->f_path, name, namelen, - NULL, transid); + NULL, transid, readonly); } else { struct inode *src_inode; src_file = fget(fd); @@ -938,7 +961,7 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file, } ret = btrfs_mksubvol(&file->f_path, name, namelen, BTRFS_I(src_inode)->root, - transid); + transid, readonly); fput(src_file); } out: @@ -946,58 +969,139 @@ out: } static noinline int btrfs_ioctl_snap_create(struct file *file, - void __user *arg, int subvol, - int v2) + void __user *arg, int subvol) { - struct btrfs_ioctl_vol_args *vol_args = NULL; - struct btrfs_ioctl_vol_args_v2 *vol_args_v2 = NULL; - char *name; - u64 fd; + struct btrfs_ioctl_vol_args *vol_args; int ret; - if (v2) { - u64 transid = 0; - u64 *ptr = NULL; + vol_args = memdup_user(arg, sizeof(*vol_args)); + if (IS_ERR(vol_args)) + return PTR_ERR(vol_args); + vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; - vol_args_v2 = memdup_user(arg, sizeof(*vol_args_v2)); - if (IS_ERR(vol_args_v2)) - return PTR_ERR(vol_args_v2); + ret = btrfs_ioctl_snap_create_transid(file, vol_args->name, + vol_args->fd, subvol, + NULL, false); - if (vol_args_v2->flags & ~BTRFS_SUBVOL_CREATE_ASYNC) { - ret = -EINVAL; - goto out; - } - - name = vol_args_v2->name; - fd = vol_args_v2->fd; - vol_args_v2->name[BTRFS_SUBVOL_NAME_MAX] = '\0'; + kfree(vol_args); + return ret; +} - if (vol_args_v2->flags & BTRFS_SUBVOL_CREATE_ASYNC) - ptr = &transid; +static noinline int btrfs_ioctl_snap_create_v2(struct file *file, + void __user *arg, int subvol) +{ + struct btrfs_ioctl_vol_args_v2 *vol_args; + int ret; + u64 transid = 0; + u64 *ptr = NULL; + bool readonly = false; - ret = btrfs_ioctl_snap_create_transid(file, name, fd, - subvol, ptr); + vol_args = memdup_user(arg, sizeof(*vol_args)); + if (IS_ERR(vol_args)) + return PTR_ERR(vol_args); + vol_args->name[BTRFS_SUBVOL_NAME_MAX] = '\0'; - if (ret == 0 && ptr && - copy_to_user(arg + - offsetof(struct btrfs_ioctl_vol_args_v2, - transid), ptr, sizeof(*ptr))) - ret = -EFAULT; - } else { - vol_args = memdup_user(arg, sizeof(*vol_args)); - if (IS_ERR(vol_args)) - return PTR_ERR(vol_args); - name = vol_args->name; - fd = vol_args->fd; - vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; - - ret = btrfs_ioctl_snap_create_transid(file, name, fd, - subvol, NULL); + if (vol_args->flags & + ~(BTRFS_SUBVOL_CREATE_ASYNC | BTRFS_SUBVOL_RDONLY)) { + ret = -EOPNOTSUPP; + goto out; } + + if (vol_args->flags & BTRFS_SUBVOL_CREATE_ASYNC) + ptr = &transid; + if (vol_args->flags & BTRFS_SUBVOL_RDONLY) + readonly = true; + + ret = btrfs_ioctl_snap_create_transid(file, vol_args->name, + vol_args->fd, subvol, + ptr, readonly); + + if (ret == 0 && ptr && + copy_to_user(arg + + offsetof(struct btrfs_ioctl_vol_args_v2, + transid), ptr, sizeof(*ptr))) + ret = -EFAULT; out: kfree(vol_args); - kfree(vol_args_v2); + return ret; +} +static noinline int btrfs_ioctl_subvol_getflags(struct file *file, + void __user *arg) +{ + struct inode *inode = fdentry(file)->d_inode; + struct btrfs_root *root = BTRFS_I(inode)->root; + int ret = 0; + u64 flags = 0; + + if (inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) + return -EINVAL; + + down_read(&root->fs_info->subvol_sem); + if (btrfs_root_readonly(root)) + flags |= BTRFS_SUBVOL_RDONLY; + up_read(&root->fs_info->subvol_sem); + + if (copy_to_user(arg, &flags, sizeof(flags))) + ret = -EFAULT; + + return ret; +} + +static noinline int btrfs_ioctl_subvol_setflags(struct file *file, + void __user *arg) +{ + struct inode *inode = fdentry(file)->d_inode; + struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_trans_handle *trans; + u64 root_flags; + u64 flags; + int ret = 0; + + if (root->fs_info->sb->s_flags & MS_RDONLY) + return -EROFS; + + if (inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) + return -EINVAL; + + if (copy_from_user(&flags, arg, sizeof(flags))) + return -EFAULT; + + if (flags & ~BTRFS_SUBVOL_CREATE_ASYNC) + return -EINVAL; + + if (flags & ~BTRFS_SUBVOL_RDONLY) + return -EOPNOTSUPP; + + down_write(&root->fs_info->subvol_sem); + + /* nothing to do */ + if (!!(flags & BTRFS_SUBVOL_RDONLY) == btrfs_root_readonly(root)) + goto out; + + root_flags = btrfs_root_flags(&root->root_item); + if (flags & BTRFS_SUBVOL_RDONLY) + btrfs_set_root_flags(&root->root_item, + root_flags | BTRFS_ROOT_SUBVOL_RDONLY); + else + btrfs_set_root_flags(&root->root_item, + root_flags & ~BTRFS_ROOT_SUBVOL_RDONLY); + + trans = btrfs_start_transaction(root, 1); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + goto out_reset; + } + + ret = btrfs_update_root(trans, root, + &root->root_key, &root->root_item); + + btrfs_commit_transaction(trans, root); +out_reset: + if (ret) + btrfs_set_root_flags(&root->root_item, root_flags); +out: + up_write(&root->fs_info->subvol_sem); return ret; } @@ -1509,6 +1613,9 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp) struct btrfs_ioctl_defrag_range_args *range; int ret; + if (btrfs_root_readonly(root)) + return -EROFS; + ret = mnt_want_write(file->f_path.mnt); if (ret) return ret; @@ -1637,6 +1744,9 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND)) return -EINVAL; + if (btrfs_root_readonly(root)) + return -EROFS; + ret = mnt_want_write(file->f_path.mnt); if (ret) return ret; @@ -1958,6 +2068,10 @@ static long btrfs_ioctl_trans_start(struct file *file) if (file->private_data) goto out; + ret = -EROFS; + if (btrfs_root_readonly(root)) + goto out; + ret = mnt_want_write(file->f_path.mnt); if (ret) goto out; @@ -2257,13 +2371,17 @@ long btrfs_ioctl(struct file *file, unsigned int case FS_IOC_GETVERSION: return btrfs_ioctl_getversion(file, argp); case BTRFS_IOC_SNAP_CREATE: - return btrfs_ioctl_snap_create(file, argp, 0, 0); + return btrfs_ioctl_snap_create(file, argp, 0); case BTRFS_IOC_SNAP_CREATE_V2: - return btrfs_ioctl_snap_create(file, argp, 0, 1); + return btrfs_ioctl_snap_create_v2(file, argp, 0); case BTRFS_IOC_SUBVOL_CREATE: - return btrfs_ioctl_snap_create(file, argp, 1, 0); + return btrfs_ioctl_snap_create(file, argp, 1); case BTRFS_IOC_SNAP_DESTROY: return btrfs_ioctl_snap_destroy(file, argp); + case BTRFS_IOC_SUBVOL_GETFLAGS: + return btrfs_ioctl_subvol_getflags(file, argp); + case BTRFS_IOC_SUBVOL_SETFLAGS: + return btrfs_ioctl_subvol_setflags(file, argp); case BTRFS_IOC_DEFAULT_SUBVOL: return btrfs_ioctl_default_subvol(file, argp); case BTRFS_IOC_DEFRAG: diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h index c344d12c646b..8fb382167b13 100644 --- a/fs/btrfs/ioctl.h +++ b/fs/btrfs/ioctl.h @@ -31,6 +31,7 @@ struct btrfs_ioctl_vol_args { }; #define BTRFS_SUBVOL_CREATE_ASYNC (1ULL << 0) +#define BTRFS_SUBVOL_RDONLY (1ULL << 1) #define BTRFS_SUBVOL_NAME_MAX 4039 struct btrfs_ioctl_vol_args_v2 { @@ -133,8 +134,15 @@ struct btrfs_ioctl_defrag_range_args { */ __u32 extent_thresh; + /* + * which compression method to use if turning on compression + * for this defrag operation. If unspecified, zlib will + * be used + */ + __u32 compress_type; + /* spare for later */ - __u32 unused[5]; + __u32 unused[4]; }; struct btrfs_ioctl_space_info { @@ -193,4 +201,6 @@ struct btrfs_ioctl_space_args { #define BTRFS_IOC_WAIT_SYNC _IOW(BTRFS_IOCTL_MAGIC, 22, __u64) #define BTRFS_IOC_SNAP_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 23, \ struct btrfs_ioctl_vol_args_v2) +#define BTRFS_IOC_SUBVOL_GETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 25, __u64) +#define BTRFS_IOC_SUBVOL_SETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 26, __u64) #endif diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c new file mode 100644 index 000000000000..cc9b450399df --- /dev/null +++ b/fs/btrfs/lzo.c @@ -0,0 +1,420 @@ +/* + * Copyright (C) 2008 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/vmalloc.h> +#include <linux/init.h> +#include <linux/err.h> +#include <linux/sched.h> +#include <linux/pagemap.h> +#include <linux/bio.h> +#include <linux/lzo.h> +#include "compression.h" + +#define LZO_LEN 4 + +struct workspace { + void *mem; + void *buf; /* where compressed data goes */ + void *cbuf; /* where decompressed data goes */ + struct list_head list; +}; + +static void lzo_free_workspace(struct list_head *ws) +{ + struct workspace *workspace = list_entry(ws, struct workspace, list); + + vfree(workspace->buf); + vfree(workspace->cbuf); + vfree(workspace->mem); + kfree(workspace); +} + +static struct list_head *lzo_alloc_workspace(void) +{ + struct workspace *workspace; + + workspace = kzalloc(sizeof(*workspace), GFP_NOFS); + if (!workspace) + return ERR_PTR(-ENOMEM); + + workspace->mem = vmalloc(LZO1X_MEM_COMPRESS); + workspace->buf = vmalloc(lzo1x_worst_compress(PAGE_CACHE_SIZE)); + workspace->cbuf = vmalloc(lzo1x_worst_compress(PAGE_CACHE_SIZE)); + if (!workspace->mem || !workspace->buf || !workspace->cbuf) + goto fail; + + INIT_LIST_HEAD(&workspace->list); + + return &workspace->list; +fail: + lzo_free_workspace(&workspace->list); + return ERR_PTR(-ENOMEM); +} + +static inline void write_compress_length(char *buf, size_t len) +{ + __le32 dlen; + + dlen = cpu_to_le32(len); + memcpy(buf, &dlen, LZO_LEN); +} + +static inline size_t read_compress_length(char *buf) +{ + __le32 dlen; + + memcpy(&dlen, buf, LZO_LEN); + return le32_to_cpu(dlen); +} + +static int lzo_compress_pages(struct list_head *ws, + struct address_space *mapping, + u64 start, unsigned long len, + struct page **pages, + unsigned long nr_dest_pages, + unsigned long *out_pages, + unsigned long *total_in, + unsigned long *total_out, + unsigned long max_out) +{ + struct workspace *workspace = list_entry(ws, struct workspace, list); + int ret = 0; + char *data_in; + char *cpage_out; + int nr_pages = 0; + struct page *in_page = NULL; + struct page *out_page = NULL; + unsigned long bytes_left; + + size_t in_len; + size_t out_len; + char *buf; + unsigned long tot_in = 0; + unsigned long tot_out = 0; + unsigned long pg_bytes_left; + unsigned long out_offset; + unsigned long bytes; + + *out_pages = 0; + *total_out = 0; + *total_in = 0; + + in_page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT); + data_in = kmap(in_page); + + /* + * store the size of all chunks of compressed data in + * the first 4 bytes + */ + out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); + if (out_page == NULL) { + ret = -ENOMEM; + goto out; + } + cpage_out = kmap(out_page); + out_offset = LZO_LEN; + tot_out = LZO_LEN; + pages[0] = out_page; + nr_pages = 1; + pg_bytes_left = PAGE_CACHE_SIZE - LZO_LEN; + + /* compress at most one page of data each time */ + in_len = min(len, PAGE_CACHE_SIZE); + while (tot_in < len) { + ret = lzo1x_1_compress(data_in, in_len, workspace->cbuf, + &out_len, workspace->mem); + if (ret != LZO_E_OK) { + printk(KERN_DEBUG "btrfs deflate in loop returned %d\n", + ret); + ret = -1; + goto out; + } + + /* store the size of this chunk of compressed data */ + write_compress_length(cpage_out + out_offset, out_len); + tot_out += LZO_LEN; + out_offset += LZO_LEN; + pg_bytes_left -= LZO_LEN; + + tot_in += in_len; + tot_out += out_len; + + /* copy bytes from the working buffer into the pages */ + buf = workspace->cbuf; + while (out_len) { + bytes = min_t(unsigned long, pg_bytes_left, out_len); + + memcpy(cpage_out + out_offset, buf, bytes); + + out_len -= bytes; + pg_bytes_left -= bytes; + buf += bytes; + out_offset += bytes; + + /* + * we need another page for writing out. + * + * Note if there's less than 4 bytes left, we just + * skip to a new page. + */ + if ((out_len == 0 && pg_bytes_left < LZO_LEN) || + pg_bytes_left == 0) { + if (pg_bytes_left) { + memset(cpage_out + out_offset, 0, + pg_bytes_left); + tot_out += pg_bytes_left; + } + + /* we're done, don't allocate new page */ + if (out_len == 0 && tot_in >= len) + break; + + kunmap(out_page); + if (nr_pages == nr_dest_pages) { + out_page = NULL; + ret = -1; + goto out; + } + + out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); + if (out_page == NULL) { + ret = -ENOMEM; + goto out; + } + cpage_out = kmap(out_page); + pages[nr_pages++] = out_page; + + pg_bytes_left = PAGE_CACHE_SIZE; + out_offset = 0; + } + } + + /* we're making it bigger, give up */ + if (tot_in > 8192 && tot_in < tot_out) + goto out; + + /* we're all done */ + if (tot_in >= len) + break; + + if (tot_out > max_out) + break; + + bytes_left = len - tot_in; + kunmap(in_page); + page_cache_release(in_page); + + start += PAGE_CACHE_SIZE; + in_page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT); + data_in = kmap(in_page); + in_len = min(bytes_left, PAGE_CACHE_SIZE); + } + + if (tot_out > tot_in) + goto out; + + /* store the size of all chunks of compressed data */ + cpage_out = kmap(pages[0]); + write_compress_length(cpage_out, tot_out); + + kunmap(pages[0]); + + ret = 0; + *total_out = tot_out; + *total_in = tot_in; +out: + *out_pages = nr_pages; + if (out_page) + kunmap(out_page); + + if (in_page) { + kunmap(in_page); + page_cache_release(in_page); + } + + return ret; +} + +static int lzo_decompress_biovec(struct list_head *ws, + struct page **pages_in, + u64 disk_start, + struct bio_vec *bvec, + int vcnt, + size_t srclen) +{ + struct workspace *workspace = list_entry(ws, struct workspace, list); + int ret = 0, ret2; + char *data_in; + unsigned long page_in_index = 0; + unsigned long page_out_index = 0; + unsigned long total_pages_in = (srclen + PAGE_CACHE_SIZE - 1) / + PAGE_CACHE_SIZE; + unsigned long buf_start; + unsigned long buf_offset = 0; + unsigned long bytes; + unsigned long working_bytes; + unsigned long pg_offset; + + size_t in_len; + size_t out_len; + unsigned long in_offset; + unsigned long in_page_bytes_left; + unsigned long tot_in; + unsigned long tot_out; + unsigned long tot_len; + char *buf; + + data_in = kmap(pages_in[0]); + tot_len = read_compress_length(data_in); + + tot_in = LZO_LEN; + in_offset = LZO_LEN; + tot_len = min_t(size_t, srclen, tot_len); + in_page_bytes_left = PAGE_CACHE_SIZE - LZO_LEN; + + tot_out = 0; + pg_offset = 0; + + while (tot_in < tot_len) { + in_len = read_compress_length(data_in + in_offset); + in_page_bytes_left -= LZO_LEN; + in_offset += LZO_LEN; + tot_in += LZO_LEN; + + tot_in += in_len; + working_bytes = in_len; + + /* fast path: avoid using the working buffer */ + if (in_page_bytes_left >= in_len) { + buf = data_in + in_offset; + bytes = in_len; + goto cont; + } + + /* copy bytes from the pages into the working buffer */ + buf = workspace->cbuf; + buf_offset = 0; + while (working_bytes) { + bytes = min(working_bytes, in_page_bytes_left); + + memcpy(buf + buf_offset, data_in + in_offset, bytes); + buf_offset += bytes; +cont: + working_bytes -= bytes; + in_page_bytes_left -= bytes; + in_offset += bytes; + + /* check if we need to pick another page */ + if ((working_bytes == 0 && in_page_bytes_left < LZO_LEN) + || in_page_bytes_left == 0) { + tot_in += in_page_bytes_left; + + if (working_bytes == 0 && tot_in >= tot_len) + break; + + kunmap(pages_in[page_in_index]); + page_in_index++; + if (page_in_index >= total_pages_in) { + ret = -1; + data_in = NULL; + goto done; + } + data_in = kmap(pages_in[page_in_index]); + + in_page_bytes_left = PAGE_CACHE_SIZE; + in_offset = 0; + } + } + + out_len = lzo1x_worst_compress(PAGE_CACHE_SIZE); + ret = lzo1x_decompress_safe(buf, in_len, workspace->buf, + &out_len); + if (ret != LZO_E_OK) { + printk(KERN_WARNING "btrfs decompress failed\n"); + ret = -1; + break; + } + + buf_start = tot_out; + tot_out += out_len; + + ret2 = btrfs_decompress_buf2page(workspace->buf, buf_start, + tot_out, disk_start, + bvec, vcnt, + &page_out_index, &pg_offset); + if (ret2 == 0) + break; + } +done: + if (data_in) + kunmap(pages_in[page_in_index]); + return ret; +} + +static int lzo_decompress(struct list_head *ws, unsigned char *data_in, + struct page *dest_page, + unsigned long start_byte, + size_t srclen, size_t destlen) +{ + struct workspace *workspace = list_entry(ws, struct workspace, list); + size_t in_len; + size_t out_len; + size_t tot_len; + int ret = 0; + char *kaddr; + unsigned long bytes; + + BUG_ON(srclen < LZO_LEN); + + tot_len = read_compress_length(data_in); + data_in += LZO_LEN; + + in_len = read_compress_length(data_in); + data_in += LZO_LEN; + + out_len = PAGE_CACHE_SIZE; + ret = lzo1x_decompress_safe(data_in, in_len, workspace->buf, &out_len); + if (ret != LZO_E_OK) { + printk(KERN_WARNING "btrfs decompress failed!\n"); + ret = -1; + goto out; + } + + if (out_len < start_byte) { + ret = -1; + goto out; + } + + bytes = min_t(unsigned long, destlen, out_len - start_byte); + + kaddr = kmap_atomic(dest_page, KM_USER0); + memcpy(kaddr, workspace->buf + start_byte, bytes); + kunmap_atomic(kaddr, KM_USER0); +out: + return ret; +} + +struct btrfs_compress_op btrfs_lzo_compress = { + .alloc_workspace = lzo_alloc_workspace, + .free_workspace = lzo_free_workspace, + .compress_pages = lzo_compress_pages, + .decompress_biovec = lzo_decompress_biovec, + .decompress = lzo_decompress, +}; diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index ae7737e352c9..2b61e1ddcd99 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -172,7 +172,7 @@ static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree, */ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, u64 start, u64 len, u64 disk_len, - int type, int dio) + int type, int dio, int compress_type) { struct btrfs_ordered_inode_tree *tree; struct rb_node *node; @@ -189,6 +189,7 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, entry->disk_len = disk_len; entry->bytes_left = len; entry->inode = inode; + entry->compress_type = compress_type; if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE) set_bit(type, &entry->flags); @@ -220,14 +221,25 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, u64 start, u64 len, u64 disk_len, int type) { return __btrfs_add_ordered_extent(inode, file_offset, start, len, - disk_len, type, 0); + disk_len, type, 0, + BTRFS_COMPRESS_NONE); } int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset, u64 start, u64 len, u64 disk_len, int type) { return __btrfs_add_ordered_extent(inode, file_offset, start, len, - disk_len, type, 1); + disk_len, type, 1, + BTRFS_COMPRESS_NONE); +} + +int btrfs_add_ordered_extent_compress(struct inode *inode, u64 file_offset, + u64 start, u64 len, u64 disk_len, + int type, int compress_type) +{ + return __btrfs_add_ordered_extent(inode, file_offset, start, len, + disk_len, type, 0, + compress_type); } /* diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index 61dca83119dd..ff1f69aa1883 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h @@ -68,7 +68,7 @@ struct btrfs_ordered_sum { #define BTRFS_ORDERED_NOCOW 2 /* set when we want to write in place */ -#define BTRFS_ORDERED_COMPRESSED 3 /* writing a compressed extent */ +#define BTRFS_ORDERED_COMPRESSED 3 /* writing a zlib compressed extent */ #define BTRFS_ORDERED_PREALLOC 4 /* set when writing to prealloced extent */ @@ -93,6 +93,9 @@ struct btrfs_ordered_extent { /* flags (described above) */ unsigned long flags; + /* compression algorithm */ + int compress_type; + /* reference count */ atomic_t refs; @@ -148,6 +151,9 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, u64 start, u64 len, u64 disk_len, int type); int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset, u64 start, u64 len, u64 disk_len, int type); +int btrfs_add_ordered_extent_compress(struct inode *inode, u64 file_offset, + u64 start, u64 len, u64 disk_len, + int type, int compress_type); int btrfs_add_ordered_sum(struct inode *inode, struct btrfs_ordered_extent *entry, struct btrfs_ordered_sum *sum); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 22acdaa78ce1..b2130c46fdb5 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -54,6 +54,90 @@ static const struct super_operations btrfs_super_ops; +static const char *btrfs_decode_error(struct btrfs_fs_info *fs_info, int errno, + char nbuf[16]) +{ + char *errstr = NULL; + + switch (errno) { + case -EIO: + errstr = "IO failure"; + break; + case -ENOMEM: + errstr = "Out of memory"; + break; + case -EROFS: + errstr = "Readonly filesystem"; + break; + default: + if (nbuf) { + if (snprintf(nbuf, 16, "error %d", -errno) >= 0) + errstr = nbuf; + } + break; + } + + return errstr; +} + +static void __save_error_info(struct btrfs_fs_info *fs_info) +{ + /* + * today we only save the error info into ram. Long term we'll + * also send it down to the disk + */ + fs_info->fs_state = BTRFS_SUPER_FLAG_ERROR; +} + +/* NOTE: + * We move write_super stuff at umount in order to avoid deadlock + * for umount hold all lock. + */ +static void save_error_info(struct btrfs_fs_info *fs_info) +{ + __save_error_info(fs_info); +} + +/* btrfs handle error by forcing the filesystem readonly */ +static void btrfs_handle_error(struct btrfs_fs_info *fs_info) +{ + struct super_block *sb = fs_info->sb; + + if (sb->s_flags & MS_RDONLY) + return; + + if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { + sb->s_flags |= MS_RDONLY; + printk(KERN_INFO "btrfs is forced readonly\n"); + } +} + +/* + * __btrfs_std_error decodes expected errors from the caller and + * invokes the approciate error response. + */ +void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function, + unsigned int line, int errno) +{ + struct super_block *sb = fs_info->sb; + char nbuf[16]; + const char *errstr; + + /* + * Special case: if the error is EROFS, and we're already + * under MS_RDONLY, then it is safe here. + */ + if (errno == -EROFS && (sb->s_flags & MS_RDONLY)) + return; + + errstr = btrfs_decode_error(fs_info, errno, nbuf); + printk(KERN_CRIT "BTRFS error (device %s) in %s:%d: %s\n", + sb->s_id, function, line, errstr); + save_error_info(fs_info); + + btrfs_handle_error(fs_info); +} + static void btrfs_put_super(struct super_block *sb) { struct btrfs_root *root = btrfs_sb(sb); @@ -69,9 +153,9 @@ enum { Opt_degraded, Opt_subvol, Opt_subvolid, Opt_device, Opt_nodatasum, Opt_nodatacow, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, Opt_ssd, Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress, - Opt_compress_force, Opt_notreelog, Opt_ratio, Opt_flushoncommit, - Opt_discard, Opt_space_cache, Opt_clear_cache, Opt_err, - Opt_user_subvol_rm_allowed, + Opt_compress_type, Opt_compress_force, Opt_compress_force_type, + Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard, + Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed, Opt_err, }; static match_table_t tokens = { @@ -86,7 +170,9 @@ static match_table_t tokens = { {Opt_alloc_start, "alloc_start=%s"}, {Opt_thread_pool, "thread_pool=%d"}, {Opt_compress, "compress"}, + {Opt_compress_type, "compress=%s"}, {Opt_compress_force, "compress-force"}, + {Opt_compress_force_type, "compress-force=%s"}, {Opt_ssd, "ssd"}, {Opt_ssd_spread, "ssd_spread"}, {Opt_nossd, "nossd"}, @@ -112,6 +198,8 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) char *p, *num, *orig; int intarg; int ret = 0; + char *compress_type; + bool compress_force = false; if (!options) return 0; @@ -154,14 +242,32 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) btrfs_set_opt(info->mount_opt, NODATACOW); btrfs_set_opt(info->mount_opt, NODATASUM); break; - case Opt_compress: - printk(KERN_INFO "btrfs: use compression\n"); - btrfs_set_opt(info->mount_opt, COMPRESS); - break; case Opt_compress_force: - printk(KERN_INFO "btrfs: forcing compression\n"); - btrfs_set_opt(info->mount_opt, FORCE_COMPRESS); + case Opt_compress_force_type: + compress_force = true; + case Opt_compress: + case Opt_compress_type: + if (token == Opt_compress || + token == Opt_compress_force || + strcmp(args[0].from, "zlib") == 0) { + compress_type = "zlib"; + info->compress_type = BTRFS_COMPRESS_ZLIB; + } else if (strcmp(args[0].from, "lzo") == 0) { + compress_type = "lzo"; + info->compress_type = BTRFS_COMPRESS_LZO; + } else { + ret = -EINVAL; + goto out; + } + btrfs_set_opt(info->mount_opt, COMPRESS); + if (compress_force) { + btrfs_set_opt(info->mount_opt, FORCE_COMPRESS); + pr_info("btrfs: force %s compression\n", + compress_type); + } else + pr_info("btrfs: use %s compression\n", + compress_type); break; case Opt_ssd: printk(KERN_INFO "btrfs: use ssd allocation scheme\n"); @@ -753,6 +859,127 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) return 0; } +/* + * The helper to calc the free space on the devices that can be used to store + * file data. + */ +static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes) +{ + struct btrfs_fs_info *fs_info = root->fs_info; + struct btrfs_device_info *devices_info; + struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; + struct btrfs_device *device; + u64 skip_space; + u64 type; + u64 avail_space; + u64 used_space; + u64 min_stripe_size; + int min_stripes = 1; + int i = 0, nr_devices; + int ret; + + nr_devices = fs_info->fs_devices->rw_devices; + BUG_ON(!nr_devices); + + devices_info = kmalloc(sizeof(*devices_info) * nr_devices, + GFP_NOFS); + if (!devices_info) + return -ENOMEM; + + /* calc min stripe number for data space alloction */ + type = btrfs_get_alloc_profile(root, 1); + if (type & BTRFS_BLOCK_GROUP_RAID0) + min_stripes = 2; + else if (type & BTRFS_BLOCK_GROUP_RAID1) + min_stripes = 2; + else if (type & BTRFS_BLOCK_GROUP_RAID10) + min_stripes = 4; + + if (type & BTRFS_BLOCK_GROUP_DUP) + min_stripe_size = 2 * BTRFS_STRIPE_LEN; + else + min_stripe_size = BTRFS_STRIPE_LEN; + + list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) { + if (!device->in_fs_metadata) + continue; + + avail_space = device->total_bytes - device->bytes_used; + + /* align with stripe_len */ + do_div(avail_space, BTRFS_STRIPE_LEN); + avail_space *= BTRFS_STRIPE_LEN; + + /* + * In order to avoid overwritting the superblock on the drive, + * btrfs starts at an offset of at least 1MB when doing chunk + * allocation. + */ + skip_space = 1024 * 1024; + + /* user can set the offset in fs_info->alloc_start. */ + if (fs_info->alloc_start + BTRFS_STRIPE_LEN <= + device->total_bytes) + skip_space = max(fs_info->alloc_start, skip_space); + + /* + * btrfs can not use the free space in [0, skip_space - 1], + * we must subtract it from the total. In order to implement + * it, we account the used space in this range first. + */ + ret = btrfs_account_dev_extents_size(device, 0, skip_space - 1, + &used_space); + if (ret) { + kfree(devices_info); + return ret; + } + + /* calc the free space in [0, skip_space - 1] */ + skip_space -= used_space; + + /* + * we can use the free space in [0, skip_space - 1], subtract + * it from the total. + */ + if (avail_space && avail_space >= skip_space) + avail_space -= skip_space; + else + avail_space = 0; + + if (avail_space < min_stripe_size) + continue; + + devices_info[i].dev = device; + devices_info[i].max_avail = avail_space; + + i++; + } + + nr_devices = i; + + btrfs_descending_sort_devices(devices_info, nr_devices); + + i = nr_devices - 1; + avail_space = 0; + while (nr_devices >= min_stripes) { + if (devices_info[i].max_avail >= min_stripe_size) { + int j; + u64 alloc_size; + + avail_space += devices_info[i].max_avail * min_stripes; + alloc_size = devices_info[i].max_avail; + for (j = i + 1 - min_stripes; j <= i; j++) + devices_info[j].max_avail -= alloc_size; + } + i--; + nr_devices--; + } + + kfree(devices_info); + *free_bytes = avail_space; + return 0; +} + static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) { struct btrfs_root *root = btrfs_sb(dentry->d_sb); @@ -760,17 +987,21 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) struct list_head *head = &root->fs_info->space_info; struct btrfs_space_info *found; u64 total_used = 0; - u64 total_used_data = 0; + u64 total_free_data = 0; int bits = dentry->d_sb->s_blocksize_bits; __be32 *fsid = (__be32 *)root->fs_info->fsid; + int ret; + /* holding chunk_muext to avoid allocating new chunks */ + mutex_lock(&root->fs_info->chunk_mutex); rcu_read_lock(); list_for_each_entry_rcu(found, head, list) { - if (found->flags & (BTRFS_BLOCK_GROUP_METADATA | - BTRFS_BLOCK_GROUP_SYSTEM)) - total_used_data += found->disk_total; - else - total_used_data += found->disk_used; + if (found->flags & BTRFS_BLOCK_GROUP_DATA) { + total_free_data += found->disk_total - found->disk_used; + total_free_data -= + btrfs_account_ro_block_groups_free_space(found); + } + total_used += found->disk_used; } rcu_read_unlock(); @@ -778,9 +1009,17 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_namelen = BTRFS_NAME_LEN; buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits; buf->f_bfree = buf->f_blocks - (total_used >> bits); - buf->f_bavail = buf->f_blocks - (total_used_data >> bits); buf->f_bsize = dentry->d_sb->s_blocksize; buf->f_type = BTRFS_SUPER_MAGIC; + buf->f_bavail = total_free_data; + ret = btrfs_calc_avail_data_space(root, &total_free_data); + if (ret) { + mutex_unlock(&root->fs_info->chunk_mutex); + return ret; + } + buf->f_bavail += total_free_data; + buf->f_bavail = buf->f_bavail >> bits; + mutex_unlock(&root->fs_info->chunk_mutex); /* We treat it as constant endianness (it doesn't matter _which_) because we want the fsid to come out the same whether mounted @@ -897,10 +1136,14 @@ static int __init init_btrfs_fs(void) if (err) return err; - err = btrfs_init_cachep(); + err = btrfs_init_compress(); if (err) goto free_sysfs; + err = btrfs_init_cachep(); + if (err) + goto free_compress; + err = extent_io_init(); if (err) goto free_cachep; @@ -928,6 +1171,8 @@ free_extent_io: extent_io_exit(); free_cachep: btrfs_destroy_cachep(); +free_compress: + btrfs_exit_compress(); free_sysfs: btrfs_exit_sysfs(); return err; @@ -942,7 +1187,7 @@ static void __exit exit_btrfs_fs(void) unregister_filesystem(&btrfs_fs_type); btrfs_exit_sysfs(); btrfs_cleanup_fs_uuids(); - btrfs_zlib_exit(); + btrfs_exit_compress(); } module_init(init_btrfs_fs) diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index f50e931fc217..bae5c7b8bbe2 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -181,6 +181,9 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, struct btrfs_trans_handle *h; struct btrfs_transaction *cur_trans; int ret; + + if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) + return ERR_PTR(-EROFS); again: h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); if (!h) @@ -910,6 +913,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, u64 to_reserve = 0; u64 index = 0; u64 objectid; + u64 root_flags; new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS); if (!new_root_item) { @@ -967,6 +971,13 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, btrfs_set_root_last_snapshot(&root->root_item, trans->transid); memcpy(new_root_item, &root->root_item, sizeof(*new_root_item)); + root_flags = btrfs_root_flags(new_root_item); + if (pending->readonly) + root_flags |= BTRFS_ROOT_SUBVOL_RDONLY; + else + root_flags &= ~BTRFS_ROOT_SUBVOL_RDONLY; + btrfs_set_root_flags(new_root_item, root_flags); + old = btrfs_lock_root_node(root); btrfs_cow_block(trans, root, old, NULL, 0, &old); btrfs_set_lock_blocking(old); diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index f104b57ad4ef..229a594cacd5 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -62,6 +62,7 @@ struct btrfs_pending_snapshot { struct btrfs_block_rsv block_rsv; /* extra metadata reseration for relocation */ int error; + bool readonly; struct list_head list; }; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 1718e1a5c320..d158530233b7 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -22,6 +22,7 @@ #include <linux/blkdev.h> #include <linux/random.h> #include <linux/iocontext.h> +#include <linux/capability.h> #include <asm/div64.h> #include "compat.h" #include "ctree.h" @@ -600,8 +601,10 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, set_blocksize(bdev, 4096); bh = btrfs_read_dev_super(bdev); - if (!bh) + if (!bh) { + ret = -EINVAL; goto error_close; + } disk_super = (struct btrfs_super_block *)bh->b_data; devid = btrfs_stack_device_id(&disk_super->dev_item); @@ -703,7 +706,7 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, goto error_close; bh = btrfs_read_dev_super(bdev); if (!bh) { - ret = -EIO; + ret = -EINVAL; goto error_close; } disk_super = (struct btrfs_super_block *)bh->b_data; @@ -729,59 +732,167 @@ error: return ret; } +/* helper to account the used device space in the range */ +int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start, + u64 end, u64 *length) +{ + struct btrfs_key key; + struct btrfs_root *root = device->dev_root; + struct btrfs_dev_extent *dev_extent; + struct btrfs_path *path; + u64 extent_end; + int ret; + int slot; + struct extent_buffer *l; + + *length = 0; + + if (start >= device->total_bytes) + return 0; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + path->reada = 2; + + key.objectid = device->devid; + key.offset = start; + key.type = BTRFS_DEV_EXTENT_KEY; + + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) + goto out; + if (ret > 0) { + ret = btrfs_previous_item(root, path, key.objectid, key.type); + if (ret < 0) + goto out; + } + + while (1) { + l = path->nodes[0]; + slot = path->slots[0]; + if (slot >= btrfs_header_nritems(l)) { + ret = btrfs_next_leaf(root, path); + if (ret == 0) + continue; + if (ret < 0) + goto out; + + break; + } + btrfs_item_key_to_cpu(l, &key, slot); + + if (key.objectid < device->devid) + goto next; + + if (key.objectid > device->devid) + break; + + if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY) + goto next; + + dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent); + extent_end = key.offset + btrfs_dev_extent_length(l, + dev_extent); + if (key.offset <= start && extent_end > end) { + *length = end - start + 1; + break; + } else if (key.offset <= start && extent_end > start) + *length += extent_end - start; + else if (key.offset > start && extent_end <= end) + *length += extent_end - key.offset; + else if (key.offset > start && key.offset <= end) { + *length += end - key.offset + 1; + break; + } else if (key.offset > end) + break; + +next: + path->slots[0]++; + } + ret = 0; +out: + btrfs_free_path(path); + return ret; +} + /* + * find_free_dev_extent - find free space in the specified device + * @trans: transaction handler + * @device: the device which we search the free space in + * @num_bytes: the size of the free space that we need + * @start: store the start of the free space. + * @len: the size of the free space. that we find, or the size of the max + * free space if we don't find suitable free space + * * this uses a pretty simple search, the expectation is that it is * called very infrequently and that a given device has a small number * of extents + * + * @start is used to store the start of the free space if we find. But if we + * don't find suitable free space, it will be used to store the start position + * of the max free space. + * + * @len is used to store the size of the free space that we find. + * But if we don't find suitable free space, it is used to store the size of + * the max free space. */ int find_free_dev_extent(struct btrfs_trans_handle *trans, struct btrfs_device *device, u64 num_bytes, - u64 *start, u64 *max_avail) + u64 *start, u64 *len) { struct btrfs_key key; struct btrfs_root *root = device->dev_root; - struct btrfs_dev_extent *dev_extent = NULL; + struct btrfs_dev_extent *dev_extent; struct btrfs_path *path; - u64 hole_size = 0; - u64 last_byte = 0; - u64 search_start = 0; + u64 hole_size; + u64 max_hole_start; + u64 max_hole_size; + u64 extent_end; + u64 search_start; u64 search_end = device->total_bytes; int ret; - int slot = 0; - int start_found; + int slot; struct extent_buffer *l; - path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; - path->reada = 2; - start_found = 0; - /* FIXME use last free of some kind */ /* we don't want to overwrite the superblock on the drive, * so we make sure to start at an offset of at least 1MB */ - search_start = max((u64)1024 * 1024, search_start); + search_start = 1024 * 1024; - if (root->fs_info->alloc_start + num_bytes <= device->total_bytes) + if (root->fs_info->alloc_start + num_bytes <= search_end) search_start = max(root->fs_info->alloc_start, search_start); + max_hole_start = search_start; + max_hole_size = 0; + + if (search_start >= search_end) { + ret = -ENOSPC; + goto error; + } + + path = btrfs_alloc_path(); + if (!path) { + ret = -ENOMEM; + goto error; + } + path->reada = 2; + key.objectid = device->devid; key.offset = search_start; key.type = BTRFS_DEV_EXTENT_KEY; + ret = btrfs_search_slot(trans, root, &key, path, 0, 0); if (ret < 0) - goto error; + goto out; if (ret > 0) { ret = btrfs_previous_item(root, path, key.objectid, key.type); if (ret < 0) - goto error; - if (ret > 0) - start_found = 1; + goto out; } - l = path->nodes[0]; - btrfs_item_key_to_cpu(l, &key, path->slots[0]); + while (1) { l = path->nodes[0]; slot = path->slots[0]; @@ -790,24 +901,9 @@ int find_free_dev_extent(struct btrfs_trans_handle *trans, if (ret == 0) continue; if (ret < 0) - goto error; -no_more_items: - if (!start_found) { - if (search_start >= search_end) { - ret = -ENOSPC; - goto error; - } - *start = search_start; - start_found = 1; - goto check_pending; - } - *start = last_byte > search_start ? - last_byte : search_start; - if (search_end <= *start) { - ret = -ENOSPC; - goto error; - } - goto check_pending; + goto out; + + break; } btrfs_item_key_to_cpu(l, &key, slot); @@ -815,48 +911,62 @@ no_more_items: goto next; if (key.objectid > device->devid) - goto no_more_items; + break; - if (key.offset >= search_start && key.offset > last_byte && - start_found) { - if (last_byte < search_start) - last_byte = search_start; - hole_size = key.offset - last_byte; + if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY) + goto next; - if (hole_size > *max_avail) - *max_avail = hole_size; + if (key.offset > search_start) { + hole_size = key.offset - search_start; - if (key.offset > last_byte && - hole_size >= num_bytes) { - *start = last_byte; - goto check_pending; + if (hole_size > max_hole_size) { + max_hole_start = search_start; + max_hole_size = hole_size; + } + + /* + * If this free space is greater than which we need, + * it must be the max free space that we have found + * until now, so max_hole_start must point to the start + * of this free space and the length of this free space + * is stored in max_hole_size. Thus, we return + * max_hole_start and max_hole_size and go back to the + * caller. + */ + if (hole_size >= num_bytes) { + ret = 0; + goto out; } } - if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY) - goto next; - start_found = 1; dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent); - last_byte = key.offset + btrfs_dev_extent_length(l, dev_extent); + extent_end = key.offset + btrfs_dev_extent_length(l, + dev_extent); + if (extent_end > search_start) + search_start = extent_end; next: path->slots[0]++; cond_resched(); } -check_pending: - /* we have to make sure we didn't find an extent that has already - * been allocated by the map tree or the original allocation - */ - BUG_ON(*start < search_start); - if (*start + num_bytes > search_end) { - ret = -ENOSPC; - goto error; + hole_size = search_end- search_start; + if (hole_size > max_hole_size) { + max_hole_start = search_start; + max_hole_size = hole_size; } - /* check for pending inserts here */ - ret = 0; -error: + /* See above. */ + if (hole_size < num_bytes) + ret = -ENOSPC; + else + ret = 0; + +out: btrfs_free_path(path); +error: + *start = max_hole_start; + if (len) + *len = max_hole_size; return ret; } @@ -1196,7 +1306,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) set_blocksize(bdev, 4096); bh = btrfs_read_dev_super(bdev); if (!bh) { - ret = -EIO; + ret = -EINVAL; goto error_close; } disk_super = (struct btrfs_super_block *)bh->b_data; @@ -1916,6 +2026,9 @@ int btrfs_balance(struct btrfs_root *dev_root) if (dev_root->fs_info->sb->s_flags & MS_RDONLY) return -EROFS; + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + mutex_lock(&dev_root->fs_info->volume_mutex); dev_root = dev_root->fs_info->dev_root; @@ -2154,66 +2267,67 @@ static noinline u64 chunk_bytes_by_type(u64 type, u64 calc_size, return calc_size * num_stripes; } -static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, - struct btrfs_root *extent_root, - struct map_lookup **map_ret, - u64 *num_bytes, u64 *stripe_size, - u64 start, u64 type) +/* Used to sort the devices by max_avail(descending sort) */ +int btrfs_cmp_device_free_bytes(const void *dev_info1, const void *dev_info2) { - struct btrfs_fs_info *info = extent_root->fs_info; - struct btrfs_device *device = NULL; - struct btrfs_fs_devices *fs_devices = info->fs_devices; - struct list_head *cur; - struct map_lookup *map = NULL; - struct extent_map_tree *em_tree; - struct extent_map *em; - struct list_head private_devs; - int min_stripe_size = 1 * 1024 * 1024; - u64 calc_size = 1024 * 1024 * 1024; - u64 max_chunk_size = calc_size; - u64 min_free; - u64 avail; - u64 max_avail = 0; - u64 dev_offset; - int num_stripes = 1; - int min_stripes = 1; - int sub_stripes = 0; - int looped = 0; - int ret; - int index; - int stripe_len = 64 * 1024; + if (((struct btrfs_device_info *)dev_info1)->max_avail > + ((struct btrfs_device_info *)dev_info2)->max_avail) + return -1; + else if (((struct btrfs_device_info *)dev_info1)->max_avail < + ((struct btrfs_device_info *)dev_info2)->max_avail) + return 1; + else + return 0; +} - if ((type & BTRFS_BLOCK_GROUP_RAID1) && - (type & BTRFS_BLOCK_GROUP_DUP)) { - WARN_ON(1); - type &= ~BTRFS_BLOCK_GROUP_DUP; - } - if (list_empty(&fs_devices->alloc_list)) - return -ENOSPC; +static int __btrfs_calc_nstripes(struct btrfs_fs_devices *fs_devices, u64 type, + int *num_stripes, int *min_stripes, + int *sub_stripes) +{ + *num_stripes = 1; + *min_stripes = 1; + *sub_stripes = 0; if (type & (BTRFS_BLOCK_GROUP_RAID0)) { - num_stripes = fs_devices->rw_devices; - min_stripes = 2; + *num_stripes = fs_devices->rw_devices; + *min_stripes = 2; } if (type & (BTRFS_BLOCK_GROUP_DUP)) { - num_stripes = 2; - min_stripes = 2; + *num_stripes = 2; + *min_stripes = 2; } if (type & (BTRFS_BLOCK_GROUP_RAID1)) { if (fs_devices->rw_devices < 2) return -ENOSPC; - num_stripes = 2; - min_stripes = 2; + *num_stripes = 2; + *min_stripes = 2; } if (type & (BTRFS_BLOCK_GROUP_RAID10)) { - num_stripes = fs_devices->rw_devices; - if (num_stripes < 4) + *num_stripes = fs_devices->rw_devices; + if (*num_stripes < 4) return -ENOSPC; - num_stripes &= ~(u32)1; - sub_stripes = 2; - min_stripes = 4; + *num_stripes &= ~(u32)1; + *sub_stripes = 2; + *min_stripes = 4; } + return 0; +} + +static u64 __btrfs_calc_stripe_size(struct btrfs_fs_devices *fs_devices, + u64 proposed_size, u64 type, + int num_stripes, int small_stripe) +{ + int min_stripe_size = 1 * 1024 * 1024; + u64 calc_size = proposed_size; + u64 max_chunk_size = calc_size; + int ncopies = 1; + + if (type & (BTRFS_BLOCK_GROUP_RAID1 | + BTRFS_BLOCK_GROUP_DUP | + BTRFS_BLOCK_GROUP_RAID10)) + ncopies = 2; + if (type & BTRFS_BLOCK_GROUP_DATA) { max_chunk_size = 10 * calc_size; min_stripe_size = 64 * 1024 * 1024; @@ -2230,51 +2344,209 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, max_chunk_size = min(div_factor(fs_devices->total_rw_bytes, 1), max_chunk_size); -again: - max_avail = 0; - if (!map || map->num_stripes != num_stripes) { - kfree(map); - map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS); - if (!map) - return -ENOMEM; - map->num_stripes = num_stripes; - } - - if (calc_size * num_stripes > max_chunk_size) { - calc_size = max_chunk_size; + if (calc_size * num_stripes > max_chunk_size * ncopies) { + calc_size = max_chunk_size * ncopies; do_div(calc_size, num_stripes); - do_div(calc_size, stripe_len); - calc_size *= stripe_len; + do_div(calc_size, BTRFS_STRIPE_LEN); + calc_size *= BTRFS_STRIPE_LEN; } /* we don't want tiny stripes */ - if (!looped) + if (!small_stripe) calc_size = max_t(u64, min_stripe_size, calc_size); /* - * we're about to do_div by the stripe_len so lets make sure + * we're about to do_div by the BTRFS_STRIPE_LEN so lets make sure * we end up with something bigger than a stripe */ - calc_size = max_t(u64, calc_size, stripe_len * 4); + calc_size = max_t(u64, calc_size, BTRFS_STRIPE_LEN); + + do_div(calc_size, BTRFS_STRIPE_LEN); + calc_size *= BTRFS_STRIPE_LEN; + + return calc_size; +} + +static struct map_lookup *__shrink_map_lookup_stripes(struct map_lookup *map, + int num_stripes) +{ + struct map_lookup *new; + size_t len = map_lookup_size(num_stripes); + + BUG_ON(map->num_stripes < num_stripes); + + if (map->num_stripes == num_stripes) + return map; + + new = kmalloc(len, GFP_NOFS); + if (!new) { + /* just change map->num_stripes */ + map->num_stripes = num_stripes; + return map; + } + + memcpy(new, map, len); + new->num_stripes = num_stripes; + kfree(map); + return new; +} + +/* + * helper to allocate device space from btrfs_device_info, in which we stored + * max free space information of every device. It is used when we can not + * allocate chunks by default size. + * + * By this helper, we can allocate a new chunk as larger as possible. + */ +static int __btrfs_alloc_tiny_space(struct btrfs_trans_handle *trans, + struct btrfs_fs_devices *fs_devices, + struct btrfs_device_info *devices, + int nr_device, u64 type, + struct map_lookup **map_lookup, + int min_stripes, u64 *stripe_size) +{ + int i, index, sort_again = 0; + int min_devices = min_stripes; + u64 max_avail, min_free; + struct map_lookup *map = *map_lookup; + int ret; + + if (nr_device < min_stripes) + return -ENOSPC; + + btrfs_descending_sort_devices(devices, nr_device); + + max_avail = devices[0].max_avail; + if (!max_avail) + return -ENOSPC; + + for (i = 0; i < nr_device; i++) { + /* + * if dev_offset = 0, it means the free space of this device + * is less than what we need, and we didn't search max avail + * extent on this device, so do it now. + */ + if (!devices[i].dev_offset) { + ret = find_free_dev_extent(trans, devices[i].dev, + max_avail, + &devices[i].dev_offset, + &devices[i].max_avail); + if (ret != 0 && ret != -ENOSPC) + return ret; + sort_again = 1; + } + } + + /* we update the max avail free extent of each devices, sort again */ + if (sort_again) + btrfs_descending_sort_devices(devices, nr_device); + + if (type & BTRFS_BLOCK_GROUP_DUP) + min_devices = 1; + + if (!devices[min_devices - 1].max_avail) + return -ENOSPC; + + max_avail = devices[min_devices - 1].max_avail; + if (type & BTRFS_BLOCK_GROUP_DUP) + do_div(max_avail, 2); + + max_avail = __btrfs_calc_stripe_size(fs_devices, max_avail, type, + min_stripes, 1); + if (type & BTRFS_BLOCK_GROUP_DUP) + min_free = max_avail * 2; + else + min_free = max_avail; + + if (min_free > devices[min_devices - 1].max_avail) + return -ENOSPC; + + map = __shrink_map_lookup_stripes(map, min_stripes); + *stripe_size = max_avail; + + index = 0; + for (i = 0; i < min_stripes; i++) { + map->stripes[i].dev = devices[index].dev; + map->stripes[i].physical = devices[index].dev_offset; + if (type & BTRFS_BLOCK_GROUP_DUP) { + i++; + map->stripes[i].dev = devices[index].dev; + map->stripes[i].physical = devices[index].dev_offset + + max_avail; + } + index++; + } + *map_lookup = map; + + return 0; +} - do_div(calc_size, stripe_len); - calc_size *= stripe_len; +static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, + struct btrfs_root *extent_root, + struct map_lookup **map_ret, + u64 *num_bytes, u64 *stripe_size, + u64 start, u64 type) +{ + struct btrfs_fs_info *info = extent_root->fs_info; + struct btrfs_device *device = NULL; + struct btrfs_fs_devices *fs_devices = info->fs_devices; + struct list_head *cur; + struct map_lookup *map; + struct extent_map_tree *em_tree; + struct extent_map *em; + struct btrfs_device_info *devices_info; + struct list_head private_devs; + u64 calc_size = 1024 * 1024 * 1024; + u64 min_free; + u64 avail; + u64 dev_offset; + int num_stripes; + int min_stripes; + int sub_stripes; + int min_devices; /* the min number of devices we need */ + int i; + int ret; + int index; + + if ((type & BTRFS_BLOCK_GROUP_RAID1) && + (type & BTRFS_BLOCK_GROUP_DUP)) { + WARN_ON(1); + type &= ~BTRFS_BLOCK_GROUP_DUP; + } + if (list_empty(&fs_devices->alloc_list)) + return -ENOSPC; + + ret = __btrfs_calc_nstripes(fs_devices, type, &num_stripes, + &min_stripes, &sub_stripes); + if (ret) + return ret; + + devices_info = kzalloc(sizeof(*devices_info) * fs_devices->rw_devices, + GFP_NOFS); + if (!devices_info) + return -ENOMEM; + + map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS); + if (!map) { + ret = -ENOMEM; + goto error; + } + map->num_stripes = num_stripes; cur = fs_devices->alloc_list.next; index = 0; + i = 0; - if (type & BTRFS_BLOCK_GROUP_DUP) + calc_size = __btrfs_calc_stripe_size(fs_devices, calc_size, type, + num_stripes, 0); + + if (type & BTRFS_BLOCK_GROUP_DUP) { min_free = calc_size * 2; - else + min_devices = 1; + } else { min_free = calc_size; - - /* - * we add 1MB because we never use the first 1MB of the device, unless - * we've looped, then we are likely allocating the maximum amount of - * space left already - */ - if (!looped) - min_free += 1024 * 1024; + min_devices = min_stripes; + } INIT_LIST_HEAD(&private_devs); while (index < num_stripes) { @@ -2287,27 +2559,39 @@ again: cur = cur->next; if (device->in_fs_metadata && avail >= min_free) { - ret = find_free_dev_extent(trans, device, - min_free, &dev_offset, - &max_avail); + ret = find_free_dev_extent(trans, device, min_free, + &devices_info[i].dev_offset, + &devices_info[i].max_avail); if (ret == 0) { list_move_tail(&device->dev_alloc_list, &private_devs); map->stripes[index].dev = device; - map->stripes[index].physical = dev_offset; + map->stripes[index].physical = + devices_info[i].dev_offset; index++; if (type & BTRFS_BLOCK_GROUP_DUP) { map->stripes[index].dev = device; map->stripes[index].physical = - dev_offset + calc_size; + devices_info[i].dev_offset + + calc_size; index++; } - } - } else if (device->in_fs_metadata && avail > max_avail) - max_avail = avail; + } else if (ret != -ENOSPC) + goto error; + + devices_info[i].dev = device; + i++; + } else if (device->in_fs_metadata && + avail >= BTRFS_STRIPE_LEN) { + devices_info[i].dev = device; + devices_info[i].max_avail = avail; + i++; + } + if (cur == &fs_devices->alloc_list) break; } + list_splice(&private_devs, &fs_devices->alloc_list); if (index < num_stripes) { if (index >= min_stripes) { @@ -2316,34 +2600,36 @@ again: num_stripes /= sub_stripes; num_stripes *= sub_stripes; } - looped = 1; - goto again; - } - if (!looped && max_avail > 0) { - looped = 1; - calc_size = max_avail; - goto again; + + map = __shrink_map_lookup_stripes(map, num_stripes); + } else if (i >= min_devices) { + ret = __btrfs_alloc_tiny_space(trans, fs_devices, + devices_info, i, type, + &map, min_stripes, + &calc_size); + if (ret) + goto error; + } else { + ret = -ENOSPC; + goto error; } - kfree(map); - return -ENOSPC; } map->sector_size = extent_root->sectorsize; - map->stripe_len = stripe_len; - map->io_align = stripe_len; - map->io_width = stripe_len; + map->stripe_len = BTRFS_STRIPE_LEN; + map->io_align = BTRFS_STRIPE_LEN; + map->io_width = BTRFS_STRIPE_LEN; map->type = type; - map->num_stripes = num_stripes; map->sub_stripes = sub_stripes; *map_ret = map; *stripe_size = calc_size; *num_bytes = chunk_bytes_by_type(type, calc_size, - num_stripes, sub_stripes); + map->num_stripes, sub_stripes); em = alloc_extent_map(GFP_NOFS); if (!em) { - kfree(map); - return -ENOMEM; + ret = -ENOMEM; + goto error; } em->bdev = (struct block_device *)map; em->start = start; @@ -2376,7 +2662,13 @@ again: index++; } + kfree(devices_info); return 0; + +error: + kfree(map); + kfree(devices_info); + return ret; } static int __finish_chunk_alloc(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 1be781079450..7fb59d45fe8c 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -20,8 +20,11 @@ #define __BTRFS_VOLUMES_ #include <linux/bio.h> +#include <linux/sort.h> #include "async-thread.h" +#define BTRFS_STRIPE_LEN (64 * 1024) + struct buffer_head; struct btrfs_pending_bios { struct bio *head; @@ -136,6 +139,30 @@ struct btrfs_multi_bio { struct btrfs_bio_stripe stripes[]; }; +struct btrfs_device_info { + struct btrfs_device *dev; + u64 dev_offset; + u64 max_avail; +}; + +/* Used to sort the devices by max_avail(descending sort) */ +int btrfs_cmp_device_free_bytes(const void *dev_info1, const void *dev_info2); + +/* + * sort the devices by max_avail, in which max free extent size of each device + * is stored.(Descending Sort) + */ +static inline void btrfs_descending_sort_devices( + struct btrfs_device_info *devices, + size_t nr_devices) +{ + sort(devices, nr_devices, sizeof(struct btrfs_device_info), + btrfs_cmp_device_free_bytes, NULL); +} + +int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start, + u64 end, u64 *length); + #define btrfs_multi_bio_size(n) (sizeof(struct btrfs_multi_bio) + \ (sizeof(struct btrfs_bio_stripe) * (n))) diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index 698fdd2c739c..a5776531dc2b 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -316,6 +316,15 @@ ssize_t btrfs_getxattr(struct dentry *dentry, const char *name, int btrfs_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags) { + struct btrfs_root *root = BTRFS_I(dentry->d_inode)->root; + + /* + * The permission on security.* and system.* is not checked + * in permission(). + */ + if (btrfs_root_readonly(root)) + return -EROFS; + /* * If this is a request for a synthetic attribute in the system.* * namespace use the generic infrastructure to resolve a handler @@ -336,6 +345,15 @@ int btrfs_setxattr(struct dentry *dentry, const char *name, const void *value, int btrfs_removexattr(struct dentry *dentry, const char *name) { + struct btrfs_root *root = BTRFS_I(dentry->d_inode)->root; + + /* + * The permission on security.* and system.* is not checked + * in permission(). + */ + if (btrfs_root_readonly(root)) + return -EROFS; + /* * If this is a request for a synthetic attribute in the system.* * namespace use the generic infrastructure to resolve a handler diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c index b9cd5445f71c..f5ec2d44150d 100644 --- a/fs/btrfs/zlib.c +++ b/fs/btrfs/zlib.c @@ -32,15 +32,6 @@ #include <linux/bio.h> #include "compression.h" -/* Plan: call deflate() with avail_in == *sourcelen, - avail_out = *dstlen - 12 and flush == Z_FINISH. - If it doesn't manage to finish, call it again with - avail_in == 0 and avail_out set to the remaining 12 - bytes for it to clean up. - Q: Is 12 bytes sufficient? -*/ -#define STREAM_END_SPACE 12 - struct workspace { z_stream inf_strm; z_stream def_strm; @@ -48,152 +39,51 @@ struct workspace { struct list_head list; }; -static LIST_HEAD(idle_workspace); -static DEFINE_SPINLOCK(workspace_lock); -static unsigned long num_workspace; -static atomic_t alloc_workspace = ATOMIC_INIT(0); -static DECLARE_WAIT_QUEUE_HEAD(workspace_wait); +static void zlib_free_workspace(struct list_head *ws) +{ + struct workspace *workspace = list_entry(ws, struct workspace, list); -/* - * this finds an available zlib workspace or allocates a new one - * NULL or an ERR_PTR is returned if things go bad. - */ -static struct workspace *find_zlib_workspace(void) + vfree(workspace->def_strm.workspace); + vfree(workspace->inf_strm.workspace); + kfree(workspace->buf); + kfree(workspace); +} + +static struct list_head *zlib_alloc_workspace(void) { struct workspace *workspace; - int ret; - int cpus = num_online_cpus(); - -again: - spin_lock(&workspace_lock); - if (!list_empty(&idle_workspace)) { - workspace = list_entry(idle_workspace.next, struct workspace, - list); - list_del(&workspace->list); - num_workspace--; - spin_unlock(&workspace_lock); - return workspace; - } - spin_unlock(&workspace_lock); - if (atomic_read(&alloc_workspace) > cpus) { - DEFINE_WAIT(wait); - prepare_to_wait(&workspace_wait, &wait, TASK_UNINTERRUPTIBLE); - if (atomic_read(&alloc_workspace) > cpus) - schedule(); - finish_wait(&workspace_wait, &wait); - goto again; - } - atomic_inc(&alloc_workspace); workspace = kzalloc(sizeof(*workspace), GFP_NOFS); - if (!workspace) { - ret = -ENOMEM; - goto fail; - } + if (!workspace) + return ERR_PTR(-ENOMEM); workspace->def_strm.workspace = vmalloc(zlib_deflate_workspacesize()); - if (!workspace->def_strm.workspace) { - ret = -ENOMEM; - goto fail; - } workspace->inf_strm.workspace = vmalloc(zlib_inflate_workspacesize()); - if (!workspace->inf_strm.workspace) { - ret = -ENOMEM; - goto fail_inflate; - } workspace->buf = kmalloc(PAGE_CACHE_SIZE, GFP_NOFS); - if (!workspace->buf) { - ret = -ENOMEM; - goto fail_kmalloc; - } - return workspace; - -fail_kmalloc: - vfree(workspace->inf_strm.workspace); -fail_inflate: - vfree(workspace->def_strm.workspace); -fail: - kfree(workspace); - atomic_dec(&alloc_workspace); - wake_up(&workspace_wait); - return ERR_PTR(ret); -} - -/* - * put a workspace struct back on the list or free it if we have enough - * idle ones sitting around - */ -static int free_workspace(struct workspace *workspace) -{ - spin_lock(&workspace_lock); - if (num_workspace < num_online_cpus()) { - list_add_tail(&workspace->list, &idle_workspace); - num_workspace++; - spin_unlock(&workspace_lock); - if (waitqueue_active(&workspace_wait)) - wake_up(&workspace_wait); - return 0; - } - spin_unlock(&workspace_lock); - vfree(workspace->def_strm.workspace); - vfree(workspace->inf_strm.workspace); - kfree(workspace->buf); - kfree(workspace); + if (!workspace->def_strm.workspace || + !workspace->inf_strm.workspace || !workspace->buf) + goto fail; - atomic_dec(&alloc_workspace); - if (waitqueue_active(&workspace_wait)) - wake_up(&workspace_wait); - return 0; -} + INIT_LIST_HEAD(&workspace->list); -/* - * cleanup function for module exit - */ -static void free_workspaces(void) -{ - struct workspace *workspace; - while (!list_empty(&idle_workspace)) { - workspace = list_entry(idle_workspace.next, struct workspace, - list); - list_del(&workspace->list); - vfree(workspace->def_strm.workspace); - vfree(workspace->inf_strm.workspace); - kfree(workspace->buf); - kfree(workspace); - atomic_dec(&alloc_workspace); - } + return &workspace->list; +fail: + zlib_free_workspace(&workspace->list); + return ERR_PTR(-ENOMEM); } -/* - * given an address space and start/len, compress the bytes. - * - * pages are allocated to hold the compressed result and stored - * in 'pages' - * - * out_pages is used to return the number of pages allocated. There - * may be pages allocated even if we return an error - * - * total_in is used to return the number of bytes actually read. It - * may be smaller then len if we had to exit early because we - * ran out of room in the pages array or because we cross the - * max_out threshold. - * - * total_out is used to return the total number of compressed bytes - * - * max_out tells us the max number of bytes that we're allowed to - * stuff into pages - */ -int btrfs_zlib_compress_pages(struct address_space *mapping, - u64 start, unsigned long len, - struct page **pages, - unsigned long nr_dest_pages, - unsigned long *out_pages, - unsigned long *total_in, - unsigned long *total_out, - unsigned long max_out) +static int zlib_compress_pages(struct list_head *ws, + struct address_space *mapping, + u64 start, unsigned long len, + struct page **pages, + unsigned long nr_dest_pages, + unsigned long *out_pages, + unsigned long *total_in, + unsigned long *total_out, + unsigned long max_out) { + struct workspace *workspace = list_entry(ws, struct workspace, list); int ret; - struct workspace *workspace; char *data_in; char *cpage_out; int nr_pages = 0; @@ -205,10 +95,6 @@ int btrfs_zlib_compress_pages(struct address_space *mapping, *total_out = 0; *total_in = 0; - workspace = find_zlib_workspace(); - if (IS_ERR(workspace)) - return -1; - if (Z_OK != zlib_deflateInit(&workspace->def_strm, 3)) { printk(KERN_WARNING "deflateInit failed\n"); ret = -1; @@ -222,6 +108,10 @@ int btrfs_zlib_compress_pages(struct address_space *mapping, data_in = kmap(in_page); out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); + if (out_page == NULL) { + ret = -1; + goto out; + } cpage_out = kmap(out_page); pages[0] = out_page; nr_pages = 1; @@ -260,6 +150,10 @@ int btrfs_zlib_compress_pages(struct address_space *mapping, goto out; } out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); + if (out_page == NULL) { + ret = -1; + goto out; + } cpage_out = kmap(out_page); pages[nr_pages] = out_page; nr_pages++; @@ -314,55 +208,26 @@ out: kunmap(in_page); page_cache_release(in_page); } - free_workspace(workspace); return ret; } -/* - * pages_in is an array of pages with compressed data. - * - * disk_start is the starting logical offset of this array in the file - * - * bvec is a bio_vec of pages from the file that we want to decompress into - * - * vcnt is the count of pages in the biovec - * - * srclen is the number of bytes in pages_in - * - * The basic idea is that we have a bio that was created by readpages. - * The pages in the bio are for the uncompressed data, and they may not - * be contiguous. They all correspond to the range of bytes covered by - * the compressed extent. - */ -int btrfs_zlib_decompress_biovec(struct page **pages_in, - u64 disk_start, - struct bio_vec *bvec, - int vcnt, - size_t srclen) +static int zlib_decompress_biovec(struct list_head *ws, struct page **pages_in, + u64 disk_start, + struct bio_vec *bvec, + int vcnt, + size_t srclen) { - int ret = 0; + struct workspace *workspace = list_entry(ws, struct workspace, list); + int ret = 0, ret2; int wbits = MAX_WBITS; - struct workspace *workspace; char *data_in; size_t total_out = 0; - unsigned long page_bytes_left; unsigned long page_in_index = 0; unsigned long page_out_index = 0; - struct page *page_out; unsigned long total_pages_in = (srclen + PAGE_CACHE_SIZE - 1) / PAGE_CACHE_SIZE; unsigned long buf_start; - unsigned long buf_offset; - unsigned long bytes; - unsigned long working_bytes; unsigned long pg_offset; - unsigned long start_byte; - unsigned long current_buf_start; - char *kaddr; - - workspace = find_zlib_workspace(); - if (IS_ERR(workspace)) - return -ENOMEM; data_in = kmap(pages_in[page_in_index]); workspace->inf_strm.next_in = data_in; @@ -372,8 +237,6 @@ int btrfs_zlib_decompress_biovec(struct page **pages_in, workspace->inf_strm.total_out = 0; workspace->inf_strm.next_out = workspace->buf; workspace->inf_strm.avail_out = PAGE_CACHE_SIZE; - page_out = bvec[page_out_index].bv_page; - page_bytes_left = PAGE_CACHE_SIZE; pg_offset = 0; /* If it's deflate, and it's got no preset dictionary, then @@ -389,107 +252,29 @@ int btrfs_zlib_decompress_biovec(struct page **pages_in, if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) { printk(KERN_WARNING "inflateInit failed\n"); - ret = -1; - goto out; + return -1; } while (workspace->inf_strm.total_in < srclen) { ret = zlib_inflate(&workspace->inf_strm, Z_NO_FLUSH); if (ret != Z_OK && ret != Z_STREAM_END) break; - /* - * buf start is the byte offset we're of the start of - * our workspace buffer - */ - buf_start = total_out; - /* total_out is the last byte of the workspace buffer */ + buf_start = total_out; total_out = workspace->inf_strm.total_out; - working_bytes = total_out - buf_start; - - /* - * start byte is the first byte of the page we're currently - * copying into relative to the start of the compressed data. - */ - start_byte = page_offset(page_out) - disk_start; - - if (working_bytes == 0) { - /* we didn't make progress in this inflate - * call, we're done - */ - if (ret != Z_STREAM_END) - ret = -1; + /* we didn't make progress in this inflate call, we're done */ + if (buf_start == total_out) break; - } - /* we haven't yet hit data corresponding to this page */ - if (total_out <= start_byte) - goto next; - - /* - * the start of the data we care about is offset into - * the middle of our working buffer - */ - if (total_out > start_byte && buf_start < start_byte) { - buf_offset = start_byte - buf_start; - working_bytes -= buf_offset; - } else { - buf_offset = 0; - } - current_buf_start = buf_start; - - /* copy bytes from the working buffer into the pages */ - while (working_bytes > 0) { - bytes = min(PAGE_CACHE_SIZE - pg_offset, - PAGE_CACHE_SIZE - buf_offset); - bytes = min(bytes, working_bytes); - kaddr = kmap_atomic(page_out, KM_USER0); - memcpy(kaddr + pg_offset, workspace->buf + buf_offset, - bytes); - kunmap_atomic(kaddr, KM_USER0); - flush_dcache_page(page_out); - - pg_offset += bytes; - page_bytes_left -= bytes; - buf_offset += bytes; - working_bytes -= bytes; - current_buf_start += bytes; - - /* check if we need to pick another page */ - if (page_bytes_left == 0) { - page_out_index++; - if (page_out_index >= vcnt) { - ret = 0; - goto done; - } - - page_out = bvec[page_out_index].bv_page; - pg_offset = 0; - page_bytes_left = PAGE_CACHE_SIZE; - start_byte = page_offset(page_out) - disk_start; - - /* - * make sure our new page is covered by this - * working buffer - */ - if (total_out <= start_byte) - goto next; - - /* the next page in the biovec might not - * be adjacent to the last page, but it - * might still be found inside this working - * buffer. bump our offset pointer - */ - if (total_out > start_byte && - current_buf_start < start_byte) { - buf_offset = start_byte - buf_start; - working_bytes = total_out - start_byte; - current_buf_start = buf_start + - buf_offset; - } - } + ret2 = btrfs_decompress_buf2page(workspace->buf, buf_start, + total_out, disk_start, + bvec, vcnt, + &page_out_index, &pg_offset); + if (ret2 == 0) { + ret = 0; + goto done; } -next: + workspace->inf_strm.next_out = workspace->buf; workspace->inf_strm.avail_out = PAGE_CACHE_SIZE; @@ -516,35 +301,21 @@ done: zlib_inflateEnd(&workspace->inf_strm); if (data_in) kunmap(pages_in[page_in_index]); -out: - free_workspace(workspace); return ret; } -/* - * a less complex decompression routine. Our compressed data fits in a - * single page, and we want to read a single page out of it. - * start_byte tells us the offset into the compressed data we're interested in - */ -int btrfs_zlib_decompress(unsigned char *data_in, - struct page *dest_page, - unsigned long start_byte, - size_t srclen, size_t destlen) +static int zlib_decompress(struct list_head *ws, unsigned char *data_in, + struct page *dest_page, + unsigned long start_byte, + size_t srclen, size_t destlen) { + struct workspace *workspace = list_entry(ws, struct workspace, list); int ret = 0; int wbits = MAX_WBITS; - struct workspace *workspace; unsigned long bytes_left = destlen; unsigned long total_out = 0; char *kaddr; - if (destlen > PAGE_CACHE_SIZE) - return -ENOMEM; - - workspace = find_zlib_workspace(); - if (IS_ERR(workspace)) - return -ENOMEM; - workspace->inf_strm.next_in = data_in; workspace->inf_strm.avail_in = srclen; workspace->inf_strm.total_in = 0; @@ -565,8 +336,7 @@ int btrfs_zlib_decompress(unsigned char *data_in, if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) { printk(KERN_WARNING "inflateInit failed\n"); - ret = -1; - goto out; + return -1; } while (bytes_left > 0) { @@ -616,12 +386,13 @@ next: ret = 0; zlib_inflateEnd(&workspace->inf_strm); -out: - free_workspace(workspace); return ret; } -void btrfs_zlib_exit(void) -{ - free_workspaces(); -} +struct btrfs_compress_op btrfs_zlib_compress = { + .alloc_workspace = zlib_alloc_workspace, + .free_workspace = zlib_free_workspace, + .compress_pages = zlib_compress_pages, + .decompress_biovec = zlib_decompress_biovec, + .decompress = zlib_decompress, +}; diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index a65d311d163a..9f59887badd2 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -1113,6 +1113,8 @@ cifs_parse_mount_options(char *options, const char *devname, } else if (!strnicmp(data, "uid", 3) && value && *value) { vol->linux_uid = simple_strtoul(value, &value, 0); uid_specified = true; + } else if (!strnicmp(data, "cruid", 5) && value && *value) { + vol->cred_uid = simple_strtoul(value, &value, 0); } else if (!strnicmp(data, "forceuid", 8)) { override_uid = 1; } else if (!strnicmp(data, "noforceuid", 10)) { diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c index 9aad47a2d62f..6783ce6cdc89 100644 --- a/fs/cifs/netmisc.c +++ b/fs/cifs/netmisc.c @@ -899,8 +899,8 @@ map_smb_to_linux_error(struct smb_hdr *smb, int logErr) } /* else ERRHRD class errors or junk - return EIO */ - cFYI(1, "Mapping smb error code %d to POSIX err %d", - smberrcode, rc); + cFYI(1, "Mapping smb error code 0x%x to POSIX err %d", + le32_to_cpu(smb->Status.CifsError), rc); /* generic corrective action e.g. reconnect SMB session on * ERRbaduid could be added */ diff --git a/fs/compat.c b/fs/compat.c index eb1740ac8c0a..f6fd0a00e6cc 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -257,7 +257,7 @@ static int put_compat_statfs(struct compat_statfs __user *ubuf, struct kstatfs * } /* - * The following statfs calls are copies of code from fs/open.c and + * The following statfs calls are copies of code from fs/statfs.c and * should be checked against those from time to time */ asmlinkage long compat_sys_statfs(const char __user *pathname, struct compat_statfs __user *buf) @@ -320,7 +320,9 @@ static int put_compat_statfs64(struct compat_statfs64 __user *ubuf, struct kstat __put_user(kbuf->f_namelen, &ubuf->f_namelen) || __put_user(kbuf->f_fsid.val[0], &ubuf->f_fsid.val[0]) || __put_user(kbuf->f_fsid.val[1], &ubuf->f_fsid.val[1]) || - __put_user(kbuf->f_frsize, &ubuf->f_frsize)) + __put_user(kbuf->f_frsize, &ubuf->f_frsize) || + __put_user(kbuf->f_flags, &ubuf->f_flags) || + __clear_user(ubuf->f_spare, sizeof(ubuf->f_spare))) return -EFAULT; return 0; } @@ -597,10 +599,8 @@ ssize_t compat_rw_copy_check_uvector(int type, if (nr_segs > fast_segs) { ret = -ENOMEM; iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL); - if (iov == NULL) { - *ret_pointer = fast_pointer; + if (iov == NULL) goto out; - } } *ret_pointer = iov; diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index cbadc1bee6e7..bfd8b680e648 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c @@ -348,7 +348,7 @@ static int encrypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat, BUG_ON(!crypt_stat || !crypt_stat->tfm || !(crypt_stat->flags & ECRYPTFS_STRUCT_INITIALIZED)); if (unlikely(ecryptfs_verbosity > 0)) { - ecryptfs_printk(KERN_DEBUG, "Key size [%d]; key:\n", + ecryptfs_printk(KERN_DEBUG, "Key size [%zd]; key:\n", crypt_stat->key_size); ecryptfs_dump_hex(crypt_stat->key, crypt_stat->key_size); @@ -413,10 +413,9 @@ static int ecryptfs_encrypt_extent(struct page *enc_extent_page, rc = ecryptfs_derive_iv(extent_iv, crypt_stat, (extent_base + extent_offset)); if (rc) { - ecryptfs_printk(KERN_ERR, "Error attempting to " - "derive IV for extent [0x%.16x]; " - "rc = [%d]\n", (extent_base + extent_offset), - rc); + ecryptfs_printk(KERN_ERR, "Error attempting to derive IV for " + "extent [0x%.16llx]; rc = [%d]\n", + (unsigned long long)(extent_base + extent_offset), rc); goto out; } if (unlikely(ecryptfs_verbosity > 0)) { @@ -443,9 +442,9 @@ static int ecryptfs_encrypt_extent(struct page *enc_extent_page, } rc = 0; if (unlikely(ecryptfs_verbosity > 0)) { - ecryptfs_printk(KERN_DEBUG, "Encrypt extent [0x%.16x]; " - "rc = [%d]\n", (extent_base + extent_offset), - rc); + ecryptfs_printk(KERN_DEBUG, "Encrypt extent [0x%.16llx]; " + "rc = [%d]\n", + (unsigned long long)(extent_base + extent_offset), rc); ecryptfs_printk(KERN_DEBUG, "First 8 bytes after " "encryption:\n"); ecryptfs_dump_hex((char *)(page_address(enc_extent_page)), 8); @@ -540,10 +539,9 @@ static int ecryptfs_decrypt_extent(struct page *page, rc = ecryptfs_derive_iv(extent_iv, crypt_stat, (extent_base + extent_offset)); if (rc) { - ecryptfs_printk(KERN_ERR, "Error attempting to " - "derive IV for extent [0x%.16x]; " - "rc = [%d]\n", (extent_base + extent_offset), - rc); + ecryptfs_printk(KERN_ERR, "Error attempting to derive IV for " + "extent [0x%.16llx]; rc = [%d]\n", + (unsigned long long)(extent_base + extent_offset), rc); goto out; } if (unlikely(ecryptfs_verbosity > 0)) { @@ -571,9 +569,9 @@ static int ecryptfs_decrypt_extent(struct page *page, } rc = 0; if (unlikely(ecryptfs_verbosity > 0)) { - ecryptfs_printk(KERN_DEBUG, "Decrypt extent [0x%.16x]; " - "rc = [%d]\n", (extent_base + extent_offset), - rc); + ecryptfs_printk(KERN_DEBUG, "Decrypt extent [0x%.16llx]; " + "rc = [%d]\n", + (unsigned long long)(extent_base + extent_offset), rc); ecryptfs_printk(KERN_DEBUG, "First 8 bytes after " "decryption:\n"); ecryptfs_dump_hex((char *)(page_address(page) @@ -780,7 +778,7 @@ int ecryptfs_init_crypt_ctx(struct ecryptfs_crypt_stat *crypt_stat) } ecryptfs_printk(KERN_DEBUG, "Initializing cipher [%s]; strlen = [%d]; " - "key_size_bits = [%d]\n", + "key_size_bits = [%zd]\n", crypt_stat->cipher, (int)strlen(crypt_stat->cipher), crypt_stat->key_size << 3); if (crypt_stat->tfm) { diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h index 413a3c48f0bb..dbc84ed96336 100644 --- a/fs/ecryptfs/ecryptfs_kernel.h +++ b/fs/ecryptfs/ecryptfs_kernel.h @@ -192,7 +192,6 @@ ecryptfs_get_key_payload_data(struct key *key) (((struct user_key_payload*)key->payload.data)->data); } -#define ECRYPTFS_SUPER_MAGIC 0xf15f #define ECRYPTFS_MAX_KEYSET_SIZE 1024 #define ECRYPTFS_MAX_CIPHER_NAME_SIZE 32 #define ECRYPTFS_MAX_NUM_ENC_KEYS 64 @@ -584,6 +583,7 @@ ecryptfs_set_dentry_lower_mnt(struct dentry *dentry, struct vfsmount *lower_mnt) #define ecryptfs_printk(type, fmt, arg...) \ __ecryptfs_printk(type "%s: " fmt, __func__, ## arg); +__attribute__ ((format(printf, 1, 2))) void __ecryptfs_printk(const char *fmt, ...); extern const struct file_operations ecryptfs_main_fops; diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index 91da02987bff..81e10e6a9443 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c @@ -47,7 +47,7 @@ static ssize_t ecryptfs_read_update_atime(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos) { - int rc; + ssize_t rc; struct dentry *lower_dentry; struct vfsmount *lower_vfsmount; struct file *file = iocb->ki_filp; @@ -191,18 +191,16 @@ static int ecryptfs_open(struct inode *inode, struct file *file) | ECRYPTFS_ENCRYPTED); } mutex_unlock(&crypt_stat->cs_mutex); - if (!ecryptfs_inode_to_private(inode)->lower_file) { - rc = ecryptfs_init_persistent_file(ecryptfs_dentry); - if (rc) { - printk(KERN_ERR "%s: Error attempting to initialize " - "the persistent file for the dentry with name " - "[%s]; rc = [%d]\n", __func__, - ecryptfs_dentry->d_name.name, rc); - goto out_free; - } + rc = ecryptfs_init_persistent_file(ecryptfs_dentry); + if (rc) { + printk(KERN_ERR "%s: Error attempting to initialize " + "the persistent file for the dentry with name " + "[%s]; rc = [%d]\n", __func__, + ecryptfs_dentry->d_name.name, rc); + goto out_free; } - if ((ecryptfs_inode_to_private(inode)->lower_file->f_flags & O_RDONLY) - && !(file->f_flags & O_RDONLY)) { + if ((ecryptfs_inode_to_private(inode)->lower_file->f_flags & O_ACCMODE) + == O_RDONLY && (file->f_flags & O_ACCMODE) != O_RDONLY) { rc = -EPERM; printk(KERN_WARNING "%s: Lower persistent file is RO; eCryptfs " "file must hence be opened RO\n", __func__); @@ -243,9 +241,9 @@ static int ecryptfs_open(struct inode *inode, struct file *file) } } mutex_unlock(&crypt_stat->cs_mutex); - ecryptfs_printk(KERN_DEBUG, "inode w/ addr = [0x%p], i_ino = [0x%.16x] " - "size: [0x%.16x]\n", inode, inode->i_ino, - i_size_read(inode)); + ecryptfs_printk(KERN_DEBUG, "inode w/ addr = [0x%p], i_ino = " + "[0x%.16lx] size: [0x%.16llx]\n", inode, inode->i_ino, + (unsigned long long)i_size_read(inode)); goto out; out_free: kmem_cache_free(ecryptfs_file_info_cache, diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 64ff02330752..bd33f87a1907 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -185,15 +185,13 @@ static int ecryptfs_initialize_file(struct dentry *ecryptfs_dentry) "context; rc = [%d]\n", rc); goto out; } - if (!ecryptfs_inode_to_private(ecryptfs_dentry->d_inode)->lower_file) { - rc = ecryptfs_init_persistent_file(ecryptfs_dentry); - if (rc) { - printk(KERN_ERR "%s: Error attempting to initialize " - "the persistent file for the dentry with name " - "[%s]; rc = [%d]\n", __func__, - ecryptfs_dentry->d_name.name, rc); - goto out; - } + rc = ecryptfs_init_persistent_file(ecryptfs_dentry); + if (rc) { + printk(KERN_ERR "%s: Error attempting to initialize " + "the persistent file for the dentry with name " + "[%s]; rc = [%d]\n", __func__, + ecryptfs_dentry->d_name.name, rc); + goto out; } rc = ecryptfs_write_metadata(ecryptfs_dentry); if (rc) { @@ -302,15 +300,13 @@ int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry, rc = -ENOMEM; goto out; } - if (!ecryptfs_inode_to_private(ecryptfs_dentry->d_inode)->lower_file) { - rc = ecryptfs_init_persistent_file(ecryptfs_dentry); - if (rc) { - printk(KERN_ERR "%s: Error attempting to initialize " - "the persistent file for the dentry with name " - "[%s]; rc = [%d]\n", __func__, - ecryptfs_dentry->d_name.name, rc); - goto out_free_kmem; - } + rc = ecryptfs_init_persistent_file(ecryptfs_dentry); + if (rc) { + printk(KERN_ERR "%s: Error attempting to initialize " + "the persistent file for the dentry with name " + "[%s]; rc = [%d]\n", __func__, + ecryptfs_dentry->d_name.name, rc); + goto out_free_kmem; } crypt_stat = &ecryptfs_inode_to_private( ecryptfs_dentry->d_inode)->crypt_stat; diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c index b1f6858a5223..c1436cff6f2d 100644 --- a/fs/ecryptfs/keystore.c +++ b/fs/ecryptfs/keystore.c @@ -59,7 +59,7 @@ static int process_request_key_err(long err_code) break; default: ecryptfs_printk(KERN_WARNING, "Unknown error code: " - "[0x%.16x]\n", err_code); + "[0x%.16lx]\n", err_code); rc = -EINVAL; } return rc; @@ -130,7 +130,7 @@ int ecryptfs_write_packet_length(char *dest, size_t size, } else { rc = -EINVAL; ecryptfs_printk(KERN_WARNING, - "Unsupported packet size: [%d]\n", size); + "Unsupported packet size: [%zd]\n", size); } return rc; } @@ -1672,7 +1672,7 @@ decrypt_passphrase_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok, auth_tok->session_key.decrypted_key_size); crypt_stat->flags |= ECRYPTFS_KEY_VALID; if (unlikely(ecryptfs_verbosity > 0)) { - ecryptfs_printk(KERN_DEBUG, "FEK of size [%d]:\n", + ecryptfs_printk(KERN_DEBUG, "FEK of size [%zd]:\n", crypt_stat->key_size); ecryptfs_dump_hex(crypt_stat->key, crypt_stat->key_size); @@ -1754,7 +1754,7 @@ int ecryptfs_parse_packet_set(struct ecryptfs_crypt_stat *crypt_stat, if (ECRYPTFS_SIG_SIZE != tag_11_contents_size) { ecryptfs_printk(KERN_ERR, "Expected " "signature of size [%d]; " - "read size [%d]\n", + "read size [%zd]\n", ECRYPTFS_SIG_SIZE, tag_11_contents_size); rc = -EIO; @@ -1787,8 +1787,8 @@ int ecryptfs_parse_packet_set(struct ecryptfs_crypt_stat *crypt_stat, goto out_wipe_list; break; default: - ecryptfs_printk(KERN_DEBUG, "No packet at offset " - "[%d] of the file header; hex value of " + ecryptfs_printk(KERN_DEBUG, "No packet at offset [%zd] " + "of the file header; hex value of " "character is [0x%.2x]\n", i, src[i]); next_packet_is_auth_tok_packet = 0; } @@ -1864,8 +1864,8 @@ found_matching_auth_tok: "session key for authentication token with sig " "[%.*s]; rc = [%d]. Removing auth tok " "candidate from the list and searching for " - "the next match.\n", candidate_auth_tok_sig, - ECRYPTFS_SIG_SIZE_HEX, rc); + "the next match.\n", ECRYPTFS_SIG_SIZE_HEX, + candidate_auth_tok_sig, rc); list_for_each_entry_safe(auth_tok_list_item, auth_tok_list_item_tmp, &auth_tok_list, list) { @@ -2168,7 +2168,7 @@ write_tag_3_packet(char *dest, size_t *remaining_bytes, if (encrypted_session_key_valid) { ecryptfs_printk(KERN_DEBUG, "encrypted_session_key_valid != 0; " "using auth_tok->session_key.encrypted_key, " - "where key_rec->enc_key_size = [%d]\n", + "where key_rec->enc_key_size = [%zd]\n", key_rec->enc_key_size); memcpy(key_rec->enc_key, auth_tok->session_key.encrypted_key, @@ -2198,7 +2198,7 @@ write_tag_3_packet(char *dest, size_t *remaining_bytes, if (rc < 1 || rc > 2) { ecryptfs_printk(KERN_ERR, "Error generating scatterlist " "for crypt_stat session key; expected rc = 1; " - "got rc = [%d]. key_rec->enc_key_size = [%d]\n", + "got rc = [%d]. key_rec->enc_key_size = [%zd]\n", rc, key_rec->enc_key_size); rc = -ENOMEM; goto out; @@ -2209,7 +2209,7 @@ write_tag_3_packet(char *dest, size_t *remaining_bytes, ecryptfs_printk(KERN_ERR, "Error generating scatterlist " "for crypt_stat encrypted session key; " "expected rc = 1; got rc = [%d]. " - "key_rec->enc_key_size = [%d]\n", rc, + "key_rec->enc_key_size = [%zd]\n", rc, key_rec->enc_key_size); rc = -ENOMEM; goto out; @@ -2224,7 +2224,7 @@ write_tag_3_packet(char *dest, size_t *remaining_bytes, goto out; } rc = 0; - ecryptfs_printk(KERN_DEBUG, "Encrypting [%d] bytes of the key\n", + ecryptfs_printk(KERN_DEBUG, "Encrypting [%zd] bytes of the key\n", crypt_stat->key_size); rc = crypto_blkcipher_encrypt(&desc, dst_sg, src_sg, (*key_rec).enc_key_size); @@ -2235,7 +2235,7 @@ write_tag_3_packet(char *dest, size_t *remaining_bytes, } ecryptfs_printk(KERN_DEBUG, "This should be the encrypted key:\n"); if (ecryptfs_verbosity > 0) { - ecryptfs_printk(KERN_DEBUG, "EFEK of size [%d]:\n", + ecryptfs_printk(KERN_DEBUG, "EFEK of size [%zd]:\n", key_rec->enc_key_size); ecryptfs_dump_hex(key_rec->enc_key, key_rec->enc_key_size); diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index d3b28abdd6aa..758323a0f09a 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -36,6 +36,7 @@ #include <linux/parser.h> #include <linux/fs_stack.h> #include <linux/slab.h> +#include <linux/magic.h> #include "ecryptfs_kernel.h" /** @@ -564,6 +565,7 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags ecryptfs_set_superblock_lower(s, path.dentry->d_sb); s->s_maxbytes = path.dentry->d_sb->s_maxbytes; s->s_blocksize = path.dentry->d_sb->s_blocksize; + s->s_magic = ECRYPTFS_SUPER_MAGIC; inode = ecryptfs_get_inode(path.dentry->d_inode, s); rc = PTR_ERR(inode); @@ -808,9 +810,10 @@ static int __init ecryptfs_init(void) ecryptfs_printk(KERN_ERR, "The eCryptfs extent size is " "larger than the host's page size, and so " "eCryptfs cannot run on this system. The " - "default eCryptfs extent size is [%d] bytes; " - "the page size is [%d] bytes.\n", - ECRYPTFS_DEFAULT_EXTENT_SIZE, PAGE_CACHE_SIZE); + "default eCryptfs extent size is [%u] bytes; " + "the page size is [%lu] bytes.\n", + ECRYPTFS_DEFAULT_EXTENT_SIZE, + (unsigned long)PAGE_CACHE_SIZE); goto out; } rc = ecryptfs_init_kmem_caches(); diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c index b1d82756544b..cc64fca89f8d 100644 --- a/fs/ecryptfs/mmap.c +++ b/fs/ecryptfs/mmap.c @@ -65,7 +65,7 @@ static int ecryptfs_writepage(struct page *page, struct writeback_control *wbc) rc = ecryptfs_encrypt_page(page); if (rc) { ecryptfs_printk(KERN_WARNING, "Error encrypting " - "page (upper index [0x%.16x])\n", page->index); + "page (upper index [0x%.16lx])\n", page->index); ClearPageUptodate(page); goto out; } @@ -237,7 +237,7 @@ out: ClearPageUptodate(page); else SetPageUptodate(page); - ecryptfs_printk(KERN_DEBUG, "Unlocking page with index = [0x%.16x]\n", + ecryptfs_printk(KERN_DEBUG, "Unlocking page with index = [0x%.16lx]\n", page->index); unlock_page(page); return rc; @@ -290,6 +290,7 @@ static int ecryptfs_write_begin(struct file *file, return -ENOMEM; *pagep = page; + prev_page_end_size = ((loff_t)index << PAGE_CACHE_SHIFT); if (!PageUptodate(page)) { struct ecryptfs_crypt_stat *crypt_stat = &ecryptfs_inode_to_private(mapping->host)->crypt_stat; @@ -335,18 +336,23 @@ static int ecryptfs_write_begin(struct file *file, SetPageUptodate(page); } } else { - rc = ecryptfs_decrypt_page(page); - if (rc) { - printk(KERN_ERR "%s: Error decrypting page " - "at index [%ld]; rc = [%d]\n", - __func__, page->index, rc); - ClearPageUptodate(page); - goto out; + if (prev_page_end_size + >= i_size_read(page->mapping->host)) { + zero_user(page, 0, PAGE_CACHE_SIZE); + } else { + rc = ecryptfs_decrypt_page(page); + if (rc) { + printk(KERN_ERR "%s: Error decrypting " + "page at index [%ld]; " + "rc = [%d]\n", + __func__, page->index, rc); + ClearPageUptodate(page); + goto out; + } } SetPageUptodate(page); } } - prev_page_end_size = ((loff_t)index << PAGE_CACHE_SHIFT); /* If creating a page or more of holes, zero them out via truncate. * Note, this will increase i_size. */ if (index != 0) { @@ -488,7 +494,7 @@ static int ecryptfs_write_end(struct file *file, } else ecryptfs_printk(KERN_DEBUG, "Not a new file\n"); ecryptfs_printk(KERN_DEBUG, "Calling fill_zeros_to_end_of_page" - "(page w/ index = [0x%.16x], to = [%d])\n", index, to); + "(page w/ index = [0x%.16lx], to = [%d])\n", index, to); if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) { rc = ecryptfs_write_lower_page_segment(ecryptfs_inode, page, 0, to); @@ -503,19 +509,20 @@ static int ecryptfs_write_end(struct file *file, rc = fill_zeros_to_end_of_page(page, to); if (rc) { ecryptfs_printk(KERN_WARNING, "Error attempting to fill " - "zeros in page with index = [0x%.16x]\n", index); + "zeros in page with index = [0x%.16lx]\n", index); goto out; } rc = ecryptfs_encrypt_page(page); if (rc) { ecryptfs_printk(KERN_WARNING, "Error encrypting page (upper " - "index [0x%.16x])\n", index); + "index [0x%.16lx])\n", index); goto out; } if (pos + copied > i_size_read(ecryptfs_inode)) { i_size_write(ecryptfs_inode, pos + copied); ecryptfs_printk(KERN_DEBUG, "Expanded file size to " - "[0x%.16x]\n", i_size_read(ecryptfs_inode)); + "[0x%.16llx]\n", + (unsigned long long)i_size_read(ecryptfs_inode)); } rc = ecryptfs_write_inode_size_to_metadata(ecryptfs_inode); if (rc) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 1de65f572033..0c8d97b56f34 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2065,7 +2065,7 @@ extern int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, extern void ext4_ext_truncate(struct inode *); extern void ext4_ext_init(struct super_block *); extern void ext4_ext_release(struct super_block *); -extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset, +extern long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len); extern int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, ssize_t len); diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index c4068f6abf03..63a75810b7c3 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -3627,14 +3627,15 @@ static void ext4_falloc_update_inode(struct inode *inode, } /* - * preallocate space for a file. This implements ext4's fallocate inode + * preallocate space for a file. This implements ext4's fallocate file * operation, which gets called from sys_fallocate system call. * For block-mapped files, posix_fallocate should fall back to the method * of writing zeroes to the required new blocks (the same behavior which is * expected for file systems which do not support fallocate() system call). */ -long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len) +long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) { + struct inode *inode = file->f_path.dentry->d_inode; handle_t *handle; loff_t new_size; unsigned int max_blocks; @@ -3645,7 +3646,7 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len) unsigned int credits, blkbits = inode->i_blkbits; /* We only support the FALLOC_FL_KEEP_SIZE mode */ - if (mode && (mode != FALLOC_FL_KEEP_SIZE)) + if (mode & ~FALLOC_FL_KEEP_SIZE) return -EOPNOTSUPP; /* @@ -3655,10 +3656,6 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len) if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) return -EOPNOTSUPP; - /* preallocation to directories is currently not supported */ - if (S_ISDIR(inode->i_mode)) - return -ENODEV; - map.m_lblk = offset >> blkbits; /* * We can't just convert len to max_blocks because diff --git a/fs/ext4/file.c b/fs/ext4/file.c index bb003dc9ffff..2e8322c8aa88 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -210,6 +210,7 @@ const struct file_operations ext4_file_operations = { .fsync = ext4_sync_file, .splice_read = generic_file_splice_read, .splice_write = generic_file_splice_write, + .fallocate = ext4_fallocate, }; const struct inode_operations ext4_file_inode_operations = { @@ -223,7 +224,6 @@ const struct inode_operations ext4_file_inode_operations = { .removexattr = generic_removexattr, #endif .check_acl = ext4_check_acl, - .fallocate = ext4_fallocate, .fiemap = ext4_fiemap, }; diff --git a/fs/file_table.c b/fs/file_table.c index c3dee381f1b4..c3e89adf53c0 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -311,7 +311,7 @@ struct file *fget_light(unsigned int fd, int *fput_needed) struct files_struct *files = current->files; *fput_needed = 0; - if (likely((atomic_read(&files->count) == 1))) { + if (atomic_read(&files->count) == 1) { file = fcheck_files(files, fd); } else { rcu_read_lock(); diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index fca6689e12e6..7cfdcb913363 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -19,6 +19,8 @@ #include <linux/fs.h> #include <linux/gfs2_ondisk.h> #include <linux/ext2_fs.h> +#include <linux/falloc.h> +#include <linux/swap.h> #include <linux/crc32.h> #include <linux/writeback.h> #include <asm/uaccess.h> @@ -610,6 +612,260 @@ static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov, return generic_file_aio_write(iocb, iov, nr_segs, pos); } +static void empty_write_end(struct page *page, unsigned from, + unsigned to) +{ + struct gfs2_inode *ip = GFS2_I(page->mapping->host); + + page_zero_new_buffers(page, from, to); + flush_dcache_page(page); + mark_page_accessed(page); + + if (!gfs2_is_writeback(ip)) + gfs2_page_add_databufs(ip, page, from, to); + + block_commit_write(page, from, to); +} + +static int write_empty_blocks(struct page *page, unsigned from, unsigned to) +{ + unsigned start, end, next; + struct buffer_head *bh, *head; + int error; + + if (!page_has_buffers(page)) { + error = __block_write_begin(page, from, to - from, gfs2_block_map); + if (unlikely(error)) + return error; + + empty_write_end(page, from, to); + return 0; + } + + bh = head = page_buffers(page); + next = end = 0; + while (next < from) { + next += bh->b_size; + bh = bh->b_this_page; + } + start = next; + do { + next += bh->b_size; + if (buffer_mapped(bh)) { + if (end) { + error = __block_write_begin(page, start, end - start, + gfs2_block_map); + if (unlikely(error)) + return error; + empty_write_end(page, start, end); + end = 0; + } + start = next; + } + else + end = next; + bh = bh->b_this_page; + } while (next < to); + + if (end) { + error = __block_write_begin(page, start, end - start, gfs2_block_map); + if (unlikely(error)) + return error; + empty_write_end(page, start, end); + } + + return 0; +} + +static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len, + int mode) +{ + struct gfs2_inode *ip = GFS2_I(inode); + struct buffer_head *dibh; + int error; + u64 start = offset >> PAGE_CACHE_SHIFT; + unsigned int start_offset = offset & ~PAGE_CACHE_MASK; + u64 end = (offset + len - 1) >> PAGE_CACHE_SHIFT; + pgoff_t curr; + struct page *page; + unsigned int end_offset = (offset + len) & ~PAGE_CACHE_MASK; + unsigned int from, to; + + if (!end_offset) + end_offset = PAGE_CACHE_SIZE; + + error = gfs2_meta_inode_buffer(ip, &dibh); + if (unlikely(error)) + goto out; + + gfs2_trans_add_bh(ip->i_gl, dibh, 1); + + if (gfs2_is_stuffed(ip)) { + error = gfs2_unstuff_dinode(ip, NULL); + if (unlikely(error)) + goto out; + } + + curr = start; + offset = start << PAGE_CACHE_SHIFT; + from = start_offset; + to = PAGE_CACHE_SIZE; + while (curr <= end) { + page = grab_cache_page_write_begin(inode->i_mapping, curr, + AOP_FLAG_NOFS); + if (unlikely(!page)) { + error = -ENOMEM; + goto out; + } + + if (curr == end) + to = end_offset; + error = write_empty_blocks(page, from, to); + if (!error && offset + to > inode->i_size && + !(mode & FALLOC_FL_KEEP_SIZE)) { + i_size_write(inode, offset + to); + } + unlock_page(page); + page_cache_release(page); + if (error) + goto out; + curr++; + offset += PAGE_CACHE_SIZE; + from = 0; + } + + gfs2_dinode_out(ip, dibh->b_data); + mark_inode_dirty(inode); + + brelse(dibh); + +out: + return error; +} + +static void calc_max_reserv(struct gfs2_inode *ip, loff_t max, loff_t *len, + unsigned int *data_blocks, unsigned int *ind_blocks) +{ + const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); + unsigned int max_blocks = ip->i_alloc->al_rgd->rd_free_clone; + unsigned int tmp, max_data = max_blocks - 3 * (sdp->sd_max_height - 1); + + for (tmp = max_data; tmp > sdp->sd_diptrs;) { + tmp = DIV_ROUND_UP(tmp, sdp->sd_inptrs); + max_data -= tmp; + } + /* This calculation isn't the exact reverse of gfs2_write_calc_reserve, + so it might end up with fewer data blocks */ + if (max_data <= *data_blocks) + return; + *data_blocks = max_data; + *ind_blocks = max_blocks - max_data; + *len = ((loff_t)max_data - 3) << sdp->sd_sb.sb_bsize_shift; + if (*len > max) { + *len = max; + gfs2_write_calc_reserv(ip, max, data_blocks, ind_blocks); + } +} + +static long gfs2_fallocate(struct file *file, int mode, loff_t offset, + loff_t len) +{ + struct inode *inode = file->f_path.dentry->d_inode; + struct gfs2_sbd *sdp = GFS2_SB(inode); + struct gfs2_inode *ip = GFS2_I(inode); + unsigned int data_blocks = 0, ind_blocks = 0, rblocks; + loff_t bytes, max_bytes; + struct gfs2_alloc *al; + int error; + loff_t next = (offset + len - 1) >> sdp->sd_sb.sb_bsize_shift; + next = (next + 1) << sdp->sd_sb.sb_bsize_shift; + + /* We only support the FALLOC_FL_KEEP_SIZE mode */ + if (mode & ~FALLOC_FL_KEEP_SIZE) + return -EOPNOTSUPP; + + offset = (offset >> sdp->sd_sb.sb_bsize_shift) << + sdp->sd_sb.sb_bsize_shift; + + len = next - offset; + bytes = sdp->sd_max_rg_data * sdp->sd_sb.sb_bsize / 2; + if (!bytes) + bytes = UINT_MAX; + + gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh); + error = gfs2_glock_nq(&ip->i_gh); + if (unlikely(error)) + goto out_uninit; + + if (!gfs2_write_alloc_required(ip, offset, len)) + goto out_unlock; + + while (len > 0) { + if (len < bytes) + bytes = len; + al = gfs2_alloc_get(ip); + if (!al) { + error = -ENOMEM; + goto out_unlock; + } + + error = gfs2_quota_lock_check(ip); + if (error) + goto out_alloc_put; + +retry: + gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks); + + al->al_requested = data_blocks + ind_blocks; + error = gfs2_inplace_reserve(ip); + if (error) { + if (error == -ENOSPC && bytes > sdp->sd_sb.sb_bsize) { + bytes >>= 1; + goto retry; + } + goto out_qunlock; + } + max_bytes = bytes; + calc_max_reserv(ip, len, &max_bytes, &data_blocks, &ind_blocks); + al->al_requested = data_blocks + ind_blocks; + + rblocks = RES_DINODE + ind_blocks + RES_STATFS + RES_QUOTA + + RES_RG_HDR + gfs2_rg_blocks(al); + if (gfs2_is_jdata(ip)) + rblocks += data_blocks ? data_blocks : 1; + + error = gfs2_trans_begin(sdp, rblocks, + PAGE_CACHE_SIZE/sdp->sd_sb.sb_bsize); + if (error) + goto out_trans_fail; + + error = fallocate_chunk(inode, offset, max_bytes, mode); + gfs2_trans_end(sdp); + + if (error) + goto out_trans_fail; + + len -= max_bytes; + offset += max_bytes; + gfs2_inplace_release(ip); + gfs2_quota_unlock(ip); + gfs2_alloc_put(ip); + } + goto out_unlock; + +out_trans_fail: + gfs2_inplace_release(ip); +out_qunlock: + gfs2_quota_unlock(ip); +out_alloc_put: + gfs2_alloc_put(ip); +out_unlock: + gfs2_glock_dq(&ip->i_gh); +out_uninit: + gfs2_holder_uninit(&ip->i_gh); + return error; +} + #ifdef CONFIG_GFS2_FS_LOCKING_DLM /** @@ -765,6 +1021,7 @@ const struct file_operations gfs2_file_fops = { .splice_read = generic_file_splice_read, .splice_write = generic_file_splice_write, .setlease = gfs2_setlease, + .fallocate = gfs2_fallocate, }; const struct file_operations gfs2_dir_fops = { @@ -794,6 +1051,7 @@ const struct file_operations gfs2_file_fops_nolock = { .splice_read = generic_file_splice_read, .splice_write = generic_file_splice_write, .setlease = generic_setlease, + .fallocate = gfs2_fallocate, }; const struct file_operations gfs2_dir_fops_nolock = { diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c index 040b5a2e6556..d8b26ac2e20b 100644 --- a/fs/gfs2/ops_inode.c +++ b/fs/gfs2/ops_inode.c @@ -18,8 +18,6 @@ #include <linux/gfs2_ondisk.h> #include <linux/crc32.h> #include <linux/fiemap.h> -#include <linux/swap.h> -#include <linux/falloc.h> #include <asm/uaccess.h> #include "gfs2.h" @@ -1257,261 +1255,6 @@ static int gfs2_removexattr(struct dentry *dentry, const char *name) return ret; } -static void empty_write_end(struct page *page, unsigned from, - unsigned to) -{ - struct gfs2_inode *ip = GFS2_I(page->mapping->host); - - page_zero_new_buffers(page, from, to); - flush_dcache_page(page); - mark_page_accessed(page); - - if (!gfs2_is_writeback(ip)) - gfs2_page_add_databufs(ip, page, from, to); - - block_commit_write(page, from, to); -} - - -static int write_empty_blocks(struct page *page, unsigned from, unsigned to) -{ - unsigned start, end, next; - struct buffer_head *bh, *head; - int error; - - if (!page_has_buffers(page)) { - error = __block_write_begin(page, from, to - from, gfs2_block_map); - if (unlikely(error)) - return error; - - empty_write_end(page, from, to); - return 0; - } - - bh = head = page_buffers(page); - next = end = 0; - while (next < from) { - next += bh->b_size; - bh = bh->b_this_page; - } - start = next; - do { - next += bh->b_size; - if (buffer_mapped(bh)) { - if (end) { - error = __block_write_begin(page, start, end - start, - gfs2_block_map); - if (unlikely(error)) - return error; - empty_write_end(page, start, end); - end = 0; - } - start = next; - } - else - end = next; - bh = bh->b_this_page; - } while (next < to); - - if (end) { - error = __block_write_begin(page, start, end - start, gfs2_block_map); - if (unlikely(error)) - return error; - empty_write_end(page, start, end); - } - - return 0; -} - -static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len, - int mode) -{ - struct gfs2_inode *ip = GFS2_I(inode); - struct buffer_head *dibh; - int error; - u64 start = offset >> PAGE_CACHE_SHIFT; - unsigned int start_offset = offset & ~PAGE_CACHE_MASK; - u64 end = (offset + len - 1) >> PAGE_CACHE_SHIFT; - pgoff_t curr; - struct page *page; - unsigned int end_offset = (offset + len) & ~PAGE_CACHE_MASK; - unsigned int from, to; - - if (!end_offset) - end_offset = PAGE_CACHE_SIZE; - - error = gfs2_meta_inode_buffer(ip, &dibh); - if (unlikely(error)) - goto out; - - gfs2_trans_add_bh(ip->i_gl, dibh, 1); - - if (gfs2_is_stuffed(ip)) { - error = gfs2_unstuff_dinode(ip, NULL); - if (unlikely(error)) - goto out; - } - - curr = start; - offset = start << PAGE_CACHE_SHIFT; - from = start_offset; - to = PAGE_CACHE_SIZE; - while (curr <= end) { - page = grab_cache_page_write_begin(inode->i_mapping, curr, - AOP_FLAG_NOFS); - if (unlikely(!page)) { - error = -ENOMEM; - goto out; - } - - if (curr == end) - to = end_offset; - error = write_empty_blocks(page, from, to); - if (!error && offset + to > inode->i_size && - !(mode & FALLOC_FL_KEEP_SIZE)) { - i_size_write(inode, offset + to); - } - unlock_page(page); - page_cache_release(page); - if (error) - goto out; - curr++; - offset += PAGE_CACHE_SIZE; - from = 0; - } - - gfs2_dinode_out(ip, dibh->b_data); - mark_inode_dirty(inode); - - brelse(dibh); - -out: - return error; -} - -static void calc_max_reserv(struct gfs2_inode *ip, loff_t max, loff_t *len, - unsigned int *data_blocks, unsigned int *ind_blocks) -{ - const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); - unsigned int max_blocks = ip->i_alloc->al_rgd->rd_free_clone; - unsigned int tmp, max_data = max_blocks - 3 * (sdp->sd_max_height - 1); - - for (tmp = max_data; tmp > sdp->sd_diptrs;) { - tmp = DIV_ROUND_UP(tmp, sdp->sd_inptrs); - max_data -= tmp; - } - /* This calculation isn't the exact reverse of gfs2_write_calc_reserve, - so it might end up with fewer data blocks */ - if (max_data <= *data_blocks) - return; - *data_blocks = max_data; - *ind_blocks = max_blocks - max_data; - *len = ((loff_t)max_data - 3) << sdp->sd_sb.sb_bsize_shift; - if (*len > max) { - *len = max; - gfs2_write_calc_reserv(ip, max, data_blocks, ind_blocks); - } -} - -static long gfs2_fallocate(struct inode *inode, int mode, loff_t offset, - loff_t len) -{ - struct gfs2_sbd *sdp = GFS2_SB(inode); - struct gfs2_inode *ip = GFS2_I(inode); - unsigned int data_blocks = 0, ind_blocks = 0, rblocks; - loff_t bytes, max_bytes; - struct gfs2_alloc *al; - int error; - loff_t next = (offset + len - 1) >> sdp->sd_sb.sb_bsize_shift; - next = (next + 1) << sdp->sd_sb.sb_bsize_shift; - - /* We only support the FALLOC_FL_KEEP_SIZE mode */ - if (mode && (mode != FALLOC_FL_KEEP_SIZE)) - return -EOPNOTSUPP; - - offset = (offset >> sdp->sd_sb.sb_bsize_shift) << - sdp->sd_sb.sb_bsize_shift; - - len = next - offset; - bytes = sdp->sd_max_rg_data * sdp->sd_sb.sb_bsize / 2; - if (!bytes) - bytes = UINT_MAX; - - gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh); - error = gfs2_glock_nq(&ip->i_gh); - if (unlikely(error)) - goto out_uninit; - - if (!gfs2_write_alloc_required(ip, offset, len)) - goto out_unlock; - - while (len > 0) { - if (len < bytes) - bytes = len; - al = gfs2_alloc_get(ip); - if (!al) { - error = -ENOMEM; - goto out_unlock; - } - - error = gfs2_quota_lock_check(ip); - if (error) - goto out_alloc_put; - -retry: - gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks); - - al->al_requested = data_blocks + ind_blocks; - error = gfs2_inplace_reserve(ip); - if (error) { - if (error == -ENOSPC && bytes > sdp->sd_sb.sb_bsize) { - bytes >>= 1; - goto retry; - } - goto out_qunlock; - } - max_bytes = bytes; - calc_max_reserv(ip, len, &max_bytes, &data_blocks, &ind_blocks); - al->al_requested = data_blocks + ind_blocks; - - rblocks = RES_DINODE + ind_blocks + RES_STATFS + RES_QUOTA + - RES_RG_HDR + gfs2_rg_blocks(al); - if (gfs2_is_jdata(ip)) - rblocks += data_blocks ? data_blocks : 1; - - error = gfs2_trans_begin(sdp, rblocks, - PAGE_CACHE_SIZE/sdp->sd_sb.sb_bsize); - if (error) - goto out_trans_fail; - - error = fallocate_chunk(inode, offset, max_bytes, mode); - gfs2_trans_end(sdp); - - if (error) - goto out_trans_fail; - - len -= max_bytes; - offset += max_bytes; - gfs2_inplace_release(ip); - gfs2_quota_unlock(ip); - gfs2_alloc_put(ip); - } - goto out_unlock; - -out_trans_fail: - gfs2_inplace_release(ip); -out_qunlock: - gfs2_quota_unlock(ip); -out_alloc_put: - gfs2_alloc_put(ip); -out_unlock: - gfs2_glock_dq(&ip->i_gh); -out_uninit: - gfs2_holder_uninit(&ip->i_gh); - return error; -} - - static int gfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, u64 start, u64 len) { @@ -1562,7 +1305,6 @@ const struct inode_operations gfs2_file_iops = { .getxattr = gfs2_getxattr, .listxattr = gfs2_listxattr, .removexattr = gfs2_removexattr, - .fallocate = gfs2_fallocate, .fiemap = gfs2_fiemap, }; diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c index 56f0da1cfd10..1ae35baa539e 100644 --- a/fs/hpfs/inode.c +++ b/fs/hpfs/inode.c @@ -281,7 +281,7 @@ int hpfs_setattr(struct dentry *dentry, struct iattr *attr) attr->ia_size != i_size_read(inode)) { error = vmtruncate(inode, attr->ia_size); if (error) - return error; + goto out_unlock; } setattr_copy(inode, attr); diff --git a/fs/internal.h b/fs/internal.h index 12ccb86edef7..0663568b1247 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -70,8 +70,7 @@ extern void mnt_set_mountpoint(struct vfsmount *, struct dentry *, extern void release_mounts(struct list_head *); extern void umount_tree(struct vfsmount *, int, struct list_head *); extern struct vfsmount *copy_tree(struct vfsmount *, struct dentry *, int); -extern int do_add_mount(struct vfsmount *, struct path *, int); -extern void mnt_clear_expiry(struct vfsmount *); +extern int finish_automount(struct vfsmount *, struct path *); extern void mnt_make_longterm(struct vfsmount *); extern void mnt_make_shortterm(struct vfsmount *); diff --git a/fs/ioctl.c b/fs/ioctl.c index d6cc16476620..a59635e295fa 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -86,7 +86,7 @@ int fiemap_fill_next_extent(struct fiemap_extent_info *fieinfo, u64 logical, u64 phys, u64 len, u32 flags) { struct fiemap_extent extent; - struct fiemap_extent *dest = fieinfo->fi_extents_start; + struct fiemap_extent __user *dest = fieinfo->fi_extents_start; /* only count the extents */ if (fieinfo->fi_extents_max == 0) { @@ -173,6 +173,7 @@ static int fiemap_check_ranges(struct super_block *sb, static int ioctl_fiemap(struct file *filp, unsigned long arg) { struct fiemap fiemap; + struct fiemap __user *ufiemap = (struct fiemap __user *) arg; struct fiemap_extent_info fieinfo = { 0, }; struct inode *inode = filp->f_path.dentry->d_inode; struct super_block *sb = inode->i_sb; @@ -182,8 +183,7 @@ static int ioctl_fiemap(struct file *filp, unsigned long arg) if (!inode->i_op->fiemap) return -EOPNOTSUPP; - if (copy_from_user(&fiemap, (struct fiemap __user *)arg, - sizeof(struct fiemap))) + if (copy_from_user(&fiemap, ufiemap, sizeof(fiemap))) return -EFAULT; if (fiemap.fm_extent_count > FIEMAP_MAX_EXTENTS) @@ -196,7 +196,7 @@ static int ioctl_fiemap(struct file *filp, unsigned long arg) fieinfo.fi_flags = fiemap.fm_flags; fieinfo.fi_extents_max = fiemap.fm_extent_count; - fieinfo.fi_extents_start = (struct fiemap_extent *)(arg + sizeof(fiemap)); + fieinfo.fi_extents_start = ufiemap->fm_extents; if (fiemap.fm_extent_count != 0 && !access_ok(VERIFY_WRITE, fieinfo.fi_extents_start, @@ -209,7 +209,7 @@ static int ioctl_fiemap(struct file *filp, unsigned long arg) error = inode->i_op->fiemap(inode, &fieinfo, fiemap.fm_start, len); fiemap.fm_flags = fieinfo.fi_flags; fiemap.fm_mapped_extents = fieinfo.fi_extents_mapped; - if (copy_to_user((char *)arg, &fiemap, sizeof(fiemap))) + if (copy_to_user(ufiemap, &fiemap, sizeof(fiemap))) error = -EFAULT; return error; diff --git a/fs/jffs2/build.c b/fs/jffs2/build.c index 85c6be2db02f..3005ec4520ad 100644 --- a/fs/jffs2/build.c +++ b/fs/jffs2/build.c @@ -336,14 +336,13 @@ int jffs2_do_mount_fs(struct jffs2_sb_info *c) size = sizeof(struct jffs2_eraseblock) * c->nr_blocks; #ifndef __ECOS if (jffs2_blocks_use_vmalloc(c)) - c->blocks = vmalloc(size); + c->blocks = vzalloc(size); else #endif - c->blocks = kmalloc(size, GFP_KERNEL); + c->blocks = kzalloc(size, GFP_KERNEL); if (!c->blocks) return -ENOMEM; - memset(c->blocks, 0, size); for (i=0; i<c->nr_blocks; i++) { INIT_LIST_HEAD(&c->blocks[i].list); c->blocks[i].offset = i * c->sector_size; diff --git a/fs/jffs2/jffs2_fs_sb.h b/fs/jffs2/jffs2_fs_sb.h index f864005de64c..0bc6a6c80a56 100644 --- a/fs/jffs2/jffs2_fs_sb.h +++ b/fs/jffs2/jffs2_fs_sb.h @@ -144,4 +144,4 @@ struct jffs2_sb_info { void *os_priv; }; -#endif /* _JFFS2_FB_SB */ +#endif /* _JFFS2_FS_SB */ diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c index 9b572ca40a49..4f9cc0482949 100644 --- a/fs/jffs2/xattr.c +++ b/fs/jffs2/xattr.c @@ -151,7 +151,7 @@ static int do_verify_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_dat JFFS2_ERROR("node CRC failed at %#08x, read=%#08x, calc=%#08x\n", offset, je32_to_cpu(rx.hdr_crc), crc); xd->flags |= JFFS2_XFLAGS_INVALID; - return EIO; + return -EIO; } totlen = PAD(sizeof(rx) + rx.name_len + 1 + je16_to_cpu(rx.value_len)); if (je16_to_cpu(rx.magic) != JFFS2_MAGIC_BITMASK @@ -167,7 +167,7 @@ static int do_verify_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_dat je32_to_cpu(rx.xid), xd->xid, je32_to_cpu(rx.version), xd->version); xd->flags |= JFFS2_XFLAGS_INVALID; - return EIO; + return -EIO; } xd->xprefix = rx.xprefix; xd->name_len = rx.name_len; @@ -230,7 +230,7 @@ static int do_load_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum ref_offset(xd->node), xd->data_crc, crc); kfree(data); xd->flags |= JFFS2_XFLAGS_INVALID; - return EIO; + return -EIO; } xd->flags |= JFFS2_XFLAGS_HOT; @@ -268,7 +268,7 @@ static int load_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum *x if (xd->xname) return 0; if (xd->flags & JFFS2_XFLAGS_INVALID) - return EIO; + return -EIO; if (unlikely(is_xattr_datum_unchecked(c, xd))) rc = do_verify_xattr_datum(c, xd); if (!rc) @@ -460,7 +460,7 @@ static int verify_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_ref *ref if (crc != je32_to_cpu(rr.node_crc)) { JFFS2_ERROR("node CRC failed at %#08x, read=%#08x, calc=%#08x\n", offset, je32_to_cpu(rr.node_crc), crc); - return EIO; + return -EIO; } if (je16_to_cpu(rr.magic) != JFFS2_MAGIC_BITMASK || je16_to_cpu(rr.nodetype) != JFFS2_NODETYPE_XREF @@ -470,7 +470,7 @@ static int verify_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_ref *ref offset, je16_to_cpu(rr.magic), JFFS2_MAGIC_BITMASK, je16_to_cpu(rr.nodetype), JFFS2_NODETYPE_XREF, je32_to_cpu(rr.totlen), PAD(sizeof(rr))); - return EIO; + return -EIO; } ref->ino = je32_to_cpu(rr.ino); ref->xid = je32_to_cpu(rr.xid); diff --git a/fs/namei.c b/fs/namei.c index 8f7b41a14882..b753192d8c3f 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -923,37 +923,13 @@ static int follow_automount(struct path *path, unsigned flags, if (!mnt) /* mount collision */ return 0; - /* The new mount record should have at least 2 refs to prevent it being - * expired before we get a chance to add it - */ - BUG_ON(mnt_get_count(mnt) < 2); - - if (mnt->mnt_sb == path->mnt->mnt_sb && - mnt->mnt_root == path->dentry) { - mnt_clear_expiry(mnt); - mntput(mnt); - mntput(mnt); - return -ELOOP; - } + err = finish_automount(mnt, path); - /* We need to add the mountpoint to the parent. The filesystem may - * have placed it on an expiry list, and so we need to make sure it - * won't be expired under us if do_add_mount() fails (do_add_mount() - * will eat a reference unconditionally). - */ - mntget(mnt); - err = do_add_mount(mnt, path, path->mnt->mnt_flags | MNT_SHRINKABLE); switch (err) { case -EBUSY: /* Someone else made a mount here whilst we were busy */ - err = 0; - default: - mnt_clear_expiry(mnt); - mntput(mnt); - mntput(mnt); - return err; + return 0; case 0: - mntput(mnt); dput(path->dentry); if (*need_mntput) mntput(path->mnt); @@ -961,7 +937,10 @@ static int follow_automount(struct path *path, unsigned flags, path->dentry = dget(mnt->mnt_root); *need_mntput = true; return 0; + default: + return err; } + } /* diff --git a/fs/namespace.c b/fs/namespace.c index 9f544f35ed34..7b0b95371696 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1872,6 +1872,8 @@ out: return err; } +static int do_add_mount(struct vfsmount *, struct path *, int); + /* * create a new mount for userspace and request it to be added into the * namespace's tree @@ -1880,6 +1882,7 @@ static int do_new_mount(struct path *path, char *type, int flags, int mnt_flags, char *name, void *data) { struct vfsmount *mnt; + int err; if (!type) return -EINVAL; @@ -1892,14 +1895,47 @@ static int do_new_mount(struct path *path, char *type, int flags, if (IS_ERR(mnt)) return PTR_ERR(mnt); - return do_add_mount(mnt, path, mnt_flags); + err = do_add_mount(mnt, path, mnt_flags); + if (err) + mntput(mnt); + return err; +} + +int finish_automount(struct vfsmount *m, struct path *path) +{ + int err; + /* The new mount record should have at least 2 refs to prevent it being + * expired before we get a chance to add it + */ + BUG_ON(mnt_get_count(m) < 2); + + if (m->mnt_sb == path->mnt->mnt_sb && + m->mnt_root == path->dentry) { + err = -ELOOP; + goto fail; + } + + err = do_add_mount(m, path, path->mnt->mnt_flags | MNT_SHRINKABLE); + if (!err) + return 0; +fail: + /* remove m from any expiration list it may be on */ + if (!list_empty(&m->mnt_expire)) { + down_write(&namespace_sem); + br_write_lock(vfsmount_lock); + list_del_init(&m->mnt_expire); + br_write_unlock(vfsmount_lock); + up_write(&namespace_sem); + } + mntput(m); + mntput(m); + return err; } /* * add a mount into a namespace's mount tree - * - this unconditionally eats one of the caller's references to newmnt. */ -int do_add_mount(struct vfsmount *newmnt, struct path *path, int mnt_flags) +static int do_add_mount(struct vfsmount *newmnt, struct path *path, int mnt_flags) { int err; @@ -1926,15 +1962,10 @@ int do_add_mount(struct vfsmount *newmnt, struct path *path, int mnt_flags) goto unlock; newmnt->mnt_flags = mnt_flags; - if ((err = graft_tree(newmnt, path))) - goto unlock; - - up_write(&namespace_sem); - return 0; + err = graft_tree(newmnt, path); unlock: up_write(&namespace_sem); - mntput(newmnt); return err; } @@ -1956,20 +1987,6 @@ void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list) EXPORT_SYMBOL(mnt_set_expiry); /* - * Remove a vfsmount from any expiration list it may be on - */ -void mnt_clear_expiry(struct vfsmount *mnt) -{ - if (!list_empty(&mnt->mnt_expire)) { - down_write(&namespace_sem); - br_write_lock(vfsmount_lock); - list_del_init(&mnt->mnt_expire); - br_write_unlock(vfsmount_lock); - up_write(&namespace_sem); - } -} - -/* * process a list of expirable mountpoints with the intent of discarding any * mountpoints that aren't in use and haven't been touched since last we came * here diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 63e3fca266e0..a6651956482e 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -1989,20 +1989,20 @@ int ocfs2_change_file_space(struct file *file, unsigned int cmd, return __ocfs2_change_file_space(file, inode, file->f_pos, cmd, sr, 0); } -static long ocfs2_fallocate(struct inode *inode, int mode, loff_t offset, +static long ocfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t len) { + struct inode *inode = file->f_path.dentry->d_inode; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); struct ocfs2_space_resv sr; int change_size = 1; int cmd = OCFS2_IOC_RESVSP64; + if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) + return -EOPNOTSUPP; if (!ocfs2_writes_unwritten_extents(osb)) return -EOPNOTSUPP; - if (S_ISDIR(inode->i_mode)) - return -ENODEV; - if (mode & FALLOC_FL_KEEP_SIZE) change_size = 0; @@ -2610,7 +2610,6 @@ const struct inode_operations ocfs2_file_iops = { .getxattr = generic_getxattr, .listxattr = ocfs2_listxattr, .removexattr = generic_removexattr, - .fallocate = ocfs2_fallocate, .fiemap = ocfs2_fiemap, }; @@ -2642,6 +2641,7 @@ const struct file_operations ocfs2_fops = { .flock = ocfs2_flock, .splice_read = ocfs2_file_splice_read, .splice_write = ocfs2_file_splice_write, + .fallocate = ocfs2_fallocate, }; const struct file_operations ocfs2_dops = { diff --git a/fs/open.c b/fs/open.c index 5b6ef7e2859e..e52389e1f05b 100644 --- a/fs/open.c +++ b/fs/open.c @@ -255,10 +255,10 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len) if (((offset + len) > inode->i_sb->s_maxbytes) || ((offset + len) < 0)) return -EFBIG; - if (!inode->i_op->fallocate) + if (!file->f_op->fallocate) return -EOPNOTSUPP; - return inode->i_op->fallocate(inode, mode, offset, len); + return file->f_op->fallocate(file, mode, offset, len); } SYSCALL_DEFINE(fallocate)(int fd, int mode, loff_t offset, loff_t len) diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index ef51eb43e137..a55c1b46b219 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c @@ -37,6 +37,7 @@ #include "xfs_trace.h" #include <linux/dcache.h> +#include <linux/falloc.h> static const struct vm_operations_struct xfs_file_vm_ops; @@ -882,6 +883,60 @@ out_unlock: return ret; } +STATIC long +xfs_file_fallocate( + struct file *file, + int mode, + loff_t offset, + loff_t len) +{ + struct inode *inode = file->f_path.dentry->d_inode; + long error; + loff_t new_size = 0; + xfs_flock64_t bf; + xfs_inode_t *ip = XFS_I(inode); + int cmd = XFS_IOC_RESVSP; + + if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) + return -EOPNOTSUPP; + + bf.l_whence = 0; + bf.l_start = offset; + bf.l_len = len; + + xfs_ilock(ip, XFS_IOLOCK_EXCL); + + if (mode & FALLOC_FL_PUNCH_HOLE) + cmd = XFS_IOC_UNRESVSP; + + /* check the new inode size is valid before allocating */ + if (!(mode & FALLOC_FL_KEEP_SIZE) && + offset + len > i_size_read(inode)) { + new_size = offset + len; + error = inode_newsize_ok(inode, new_size); + if (error) + goto out_unlock; + } + + error = -xfs_change_file_space(ip, cmd, &bf, 0, XFS_ATTR_NOLOCK); + if (error) + goto out_unlock; + + /* Change file size if needed */ + if (new_size) { + struct iattr iattr; + + iattr.ia_valid = ATTR_SIZE; + iattr.ia_size = new_size; + error = -xfs_setattr(ip, &iattr, XFS_ATTR_NOLOCK); + } + +out_unlock: + xfs_iunlock(ip, XFS_IOLOCK_EXCL); + return error; +} + + STATIC int xfs_file_open( struct inode *inode, @@ -1000,6 +1055,7 @@ const struct file_operations xfs_file_operations = { .open = xfs_file_open, .release = xfs_file_release, .fsync = xfs_file_fsync, + .fallocate = xfs_file_fallocate, }; const struct file_operations xfs_dir_file_operations = { diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index da54403633b6..bd5727852fd6 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c @@ -46,7 +46,6 @@ #include <linux/namei.h> #include <linux/posix_acl.h> #include <linux/security.h> -#include <linux/falloc.h> #include <linux/fiemap.h> #include <linux/slab.h> @@ -505,61 +504,6 @@ xfs_vn_setattr( return -xfs_setattr(XFS_I(dentry->d_inode), iattr, 0); } -STATIC long -xfs_vn_fallocate( - struct inode *inode, - int mode, - loff_t offset, - loff_t len) -{ - long error; - loff_t new_size = 0; - xfs_flock64_t bf; - xfs_inode_t *ip = XFS_I(inode); - int cmd = XFS_IOC_RESVSP; - - /* preallocation on directories not yet supported */ - error = -ENODEV; - if (S_ISDIR(inode->i_mode)) - goto out_error; - - bf.l_whence = 0; - bf.l_start = offset; - bf.l_len = len; - - xfs_ilock(ip, XFS_IOLOCK_EXCL); - - if (mode & FALLOC_FL_PUNCH_HOLE) - cmd = XFS_IOC_UNRESVSP; - - /* check the new inode size is valid before allocating */ - if (!(mode & FALLOC_FL_KEEP_SIZE) && - offset + len > i_size_read(inode)) { - new_size = offset + len; - error = inode_newsize_ok(inode, new_size); - if (error) - goto out_unlock; - } - - error = -xfs_change_file_space(ip, cmd, &bf, 0, XFS_ATTR_NOLOCK); - if (error) - goto out_unlock; - - /* Change file size if needed */ - if (new_size) { - struct iattr iattr; - - iattr.ia_valid = ATTR_SIZE; - iattr.ia_size = new_size; - error = -xfs_setattr(ip, &iattr, XFS_ATTR_NOLOCK); - } - -out_unlock: - xfs_iunlock(ip, XFS_IOLOCK_EXCL); -out_error: - return error; -} - #define XFS_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR) /* @@ -653,7 +597,6 @@ static const struct inode_operations xfs_inode_operations = { .getxattr = generic_getxattr, .removexattr = generic_removexattr, .listxattr = xfs_vn_listxattr, - .fallocate = xfs_vn_fallocate, .fiemap = xfs_vn_fiemap, }; diff --git a/fs/xfs/support/debug.c b/fs/xfs/support/debug.c index e6cf955ec0fc..0df88897ef84 100644 --- a/fs/xfs/support/debug.c +++ b/fs/xfs/support/debug.c @@ -75,11 +75,11 @@ xfs_cmn_err( { struct va_format vaf; va_list args; - int panic = 0; + int do_panic = 0; if (xfs_panic_mask && (xfs_panic_mask & panic_tag)) { printk(KERN_ALERT "XFS: Transforming an alert into a BUG."); - panic = 1; + do_panic = 1; } va_start(args, fmt); @@ -89,7 +89,7 @@ xfs_cmn_err( printk(KERN_ALERT "Filesystem %s: %pV", mp->m_fsname, &vaf); va_end(args); - BUG_ON(panic); + BUG_ON(do_panic); } void diff --git a/include/linux/amba/pl08x.h b/include/linux/amba/pl08x.h index 521a0f8974ac..3111385b8ca7 100644 --- a/include/linux/amba/pl08x.h +++ b/include/linux/amba/pl08x.h @@ -12,7 +12,6 @@ * * Please credit ARM.com * Documentation: ARM DDI 0196D - * */ #ifndef AMBA_PL08X_H @@ -22,6 +21,15 @@ #include <linux/dmaengine.h> #include <linux/interrupt.h> +struct pl08x_lli; +struct pl08x_driver_data; + +/* Bitmasks for selecting AHB ports for DMA transfers */ +enum { + PL08X_AHB1 = (1 << 0), + PL08X_AHB2 = (1 << 1) +}; + /** * struct pl08x_channel_data - data structure to pass info between * platform and PL08x driver regarding channel configuration @@ -46,8 +54,10 @@ * @circular_buffer: whether the buffer passed in is circular and * shall simply be looped round round (like a record baby round * round round round) - * @single: the device connected to this channel will request single - * DMA transfers, not bursts. (Bursts are default.) + * @single: the device connected to this channel will request single DMA + * transfers, not bursts. (Bursts are default.) + * @periph_buses: the device connected to this channel is accessible via + * these buses (use PL08X_AHB1 | PL08X_AHB2). */ struct pl08x_channel_data { char *bus_id; @@ -55,10 +65,10 @@ struct pl08x_channel_data { int max_signal; u32 muxval; u32 cctl; - u32 ccfg; dma_addr_t addr; bool circular_buffer; bool single; + u8 periph_buses; }; /** @@ -67,24 +77,23 @@ struct pl08x_channel_data { * @addr: current address * @maxwidth: the maximum width of a transfer on this bus * @buswidth: the width of this bus in bytes: 1, 2 or 4 - * @fill_bytes: bytes required to fill to the next bus memory - * boundary + * @fill_bytes: bytes required to fill to the next bus memory boundary */ struct pl08x_bus_data { dma_addr_t addr; u8 maxwidth; u8 buswidth; - u32 fill_bytes; + size_t fill_bytes; }; /** * struct pl08x_phy_chan - holder for the physical channels * @id: physical index to this channel * @lock: a lock to use when altering an instance of this struct - * @signal: the physical signal (aka channel) serving this - * physical channel right now - * @serving: the virtual channel currently being served by this - * physical channel + * @signal: the physical signal (aka channel) serving this physical channel + * right now + * @serving: the virtual channel currently being served by this physical + * channel */ struct pl08x_phy_chan { unsigned int id; @@ -92,11 +101,6 @@ struct pl08x_phy_chan { spinlock_t lock; int signal; struct pl08x_dma_chan *serving; - u32 csrc; - u32 cdst; - u32 clli; - u32 cctl; - u32 ccfg; }; /** @@ -108,26 +112,23 @@ struct pl08x_txd { struct dma_async_tx_descriptor tx; struct list_head node; enum dma_data_direction direction; - struct pl08x_bus_data srcbus; - struct pl08x_bus_data dstbus; - int len; + dma_addr_t src_addr; + dma_addr_t dst_addr; + size_t len; dma_addr_t llis_bus; - void *llis_va; - struct pl08x_channel_data *cd; - bool active; + struct pl08x_lli *llis_va; + /* Default cctl value for LLIs */ + u32 cctl; /* * Settings to be put into the physical channel when we - * trigger this txd + * trigger this txd. Other registers are in llis_va[0]. */ - u32 csrc; - u32 cdst; - u32 clli; - u32 cctl; + u32 ccfg; }; /** - * struct pl08x_dma_chan_state - holds the PL08x specific virtual - * channel states + * struct pl08x_dma_chan_state - holds the PL08x specific virtual channel + * states * @PL08X_CHAN_IDLE: the channel is idle * @PL08X_CHAN_RUNNING: the channel has allocated a physical transport * channel and is running a transfer on it @@ -147,6 +148,8 @@ enum pl08x_dma_chan_state { * struct pl08x_dma_chan - this structure wraps a DMA ENGINE channel * @chan: wrappped abstract channel * @phychan: the physical channel utilized by this channel, if there is one + * @phychan_hold: if non-zero, hold on to the physical channel even if we + * have no pending entries * @tasklet: tasklet scheduled by the IRQ to handle actual work etc * @name: name of channel * @cd: channel platform data @@ -154,53 +157,49 @@ enum pl08x_dma_chan_state { * @runtime_direction: current direction of this channel according to * runtime config * @lc: last completed transaction on this channel - * @desc_list: queued transactions pending on this channel + * @pend_list: queued transactions pending on this channel * @at: active transaction on this channel - * @lockflags: sometimes we let a lock last between two function calls, - * especially prep/submit, and then we need to store the IRQ flags - * in the channel state, here * @lock: a lock for this channel data * @host: a pointer to the host (internal use) * @state: whether the channel is idle, paused, running etc * @slave: whether this channel is a device (slave) or for memcpy - * @waiting: a TX descriptor on this channel which is waiting for - * a physical channel to become available + * @waiting: a TX descriptor on this channel which is waiting for a physical + * channel to become available */ struct pl08x_dma_chan { struct dma_chan chan; struct pl08x_phy_chan *phychan; + int phychan_hold; struct tasklet_struct tasklet; char *name; struct pl08x_channel_data *cd; dma_addr_t runtime_addr; enum dma_data_direction runtime_direction; - atomic_t last_issued; dma_cookie_t lc; - struct list_head desc_list; + struct list_head pend_list; struct pl08x_txd *at; - unsigned long lockflags; spinlock_t lock; - void *host; + struct pl08x_driver_data *host; enum pl08x_dma_chan_state state; bool slave; struct pl08x_txd *waiting; }; /** - * struct pl08x_platform_data - the platform configuration for the - * PL08x PrimeCells. + * struct pl08x_platform_data - the platform configuration for the PL08x + * PrimeCells. * @slave_channels: the channels defined for the different devices on the * platform, all inclusive, including multiplexed channels. The available - * physical channels will be multiplexed around these signals as they - * are requested, just enumerate all possible channels. - * @get_signal: request a physical signal to be used for a DMA - * transfer immediately: if there is some multiplexing or similar blocking - * the use of the channel the transfer can be denied by returning - * less than zero, else it returns the allocated signal number + * physical channels will be multiplexed around these signals as they are + * requested, just enumerate all possible channels. + * @get_signal: request a physical signal to be used for a DMA transfer + * immediately: if there is some multiplexing or similar blocking the use + * of the channel the transfer can be denied by returning less than zero, + * else it returns the allocated signal number * @put_signal: indicate to the platform that this physical signal is not * running any DMA transfer and multiplexing can be recycled - * @bus_bit_lli: Bit[0] of the address indicated which AHB bus master the - * LLI addresses are on 0/1 Master 1/2. + * @lli_buses: buses which LLIs can be fetched from: PL08X_AHB1 | PL08X_AHB2 + * @mem_buses: buses which memory can be accessed from: PL08X_AHB1 | PL08X_AHB2 */ struct pl08x_platform_data { struct pl08x_channel_data *slave_channels; @@ -208,6 +207,8 @@ struct pl08x_platform_data { struct pl08x_channel_data memcpy_channel; int (*get_signal)(struct pl08x_dma_chan *); void (*put_signal)(struct pl08x_dma_chan *); + u8 lli_buses; + u8 mem_buses; }; #ifdef CONFIG_AMBA_PL08X diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 8cd00ad98d37..9bebd7f16ef1 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -532,7 +532,7 @@ static inline int dmaengine_resume(struct dma_chan *chan) return dmaengine_device_control(chan, DMA_RESUME, 0); } -static inline int dmaengine_submit(struct dma_async_tx_descriptor *desc) +static inline dma_cookie_t dmaengine_submit(struct dma_async_tx_descriptor *desc) { return desc->tx_submit(desc); } diff --git a/include/linux/file.h b/include/linux/file.h index b1e12970f617..e85baebf6279 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -23,7 +23,7 @@ extern struct file *alloc_file(struct path *, fmode_t mode, static inline void fput_light(struct file *file, int fput_needed) { - if (unlikely(fput_needed)) + if (fput_needed) fput(file); } diff --git a/include/linux/fs.h b/include/linux/fs.h index 177b4ddea418..32b38cd829d3 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1483,8 +1483,8 @@ struct fiemap_extent_info { unsigned int fi_flags; /* Flags as passed from user */ unsigned int fi_extents_mapped; /* Number of mapped extents */ unsigned int fi_extents_max; /* Size of fiemap_extent array */ - struct fiemap_extent *fi_extents_start; /* Start of fiemap_extent - * array */ + struct fiemap_extent __user *fi_extents_start; /* Start of + fiemap_extent array */ }; int fiemap_fill_next_extent(struct fiemap_extent_info *info, u64 logical, u64 phys, u64 len, u32 flags); @@ -1552,6 +1552,8 @@ struct file_operations { ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int); ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int); int (*setlease)(struct file *, long, struct file_lock **); + long (*fallocate)(struct file *file, int mode, loff_t offset, + loff_t len); }; #define IPERM_FLAG_RCU 0x0001 @@ -1582,8 +1584,6 @@ struct inode_operations { ssize_t (*listxattr) (struct dentry *, char *, size_t); int (*removexattr) (struct dentry *, const char *); void (*truncate_range)(struct inode *, loff_t, loff_t); - long (*fallocate)(struct inode *inode, int mode, loff_t offset, - loff_t len); int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, u64 len); } ____cacheline_aligned; diff --git a/include/linux/magic.h b/include/linux/magic.h index ff690d05f129..62730ea2b56e 100644 --- a/include/linux/magic.h +++ b/include/linux/magic.h @@ -16,6 +16,7 @@ #define TMPFS_MAGIC 0x01021994 #define HUGETLBFS_MAGIC 0x958458f6 /* some random number */ #define SQUASHFS_MAGIC 0x73717368 +#define ECRYPTFS_SUPER_MAGIC 0xf15f #define EFS_SUPER_MAGIC 0x414A53 #define EXT2_SUPER_MAGIC 0xEF53 #define EXT3_SUPER_MAGIC 0xEF53 diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index a7b15bc7648e..049214642036 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -144,6 +144,11 @@ enum { MLX4_STAT_RATE_OFFSET = 5 }; +enum mlx4_protocol { + MLX4_PROTOCOL_IB, + MLX4_PROTOCOL_EN, +}; + enum { MLX4_MTT_FLAG_PRESENT = 1 }; @@ -500,8 +505,9 @@ int mlx4_INIT_PORT(struct mlx4_dev *dev, int port); int mlx4_CLOSE_PORT(struct mlx4_dev *dev, int port); int mlx4_multicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], - int block_mcast_loopback); -int mlx4_multicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16]); + int block_mcast_loopback, enum mlx4_protocol protocol); +int mlx4_multicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], + enum mlx4_protocol protocol); int mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *index); void mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, int index); diff --git a/include/linux/mlx4/driver.h b/include/linux/mlx4/driver.h index f407cd4bfb34..e1eebf78caba 100644 --- a/include/linux/mlx4/driver.h +++ b/include/linux/mlx4/driver.h @@ -34,6 +34,7 @@ #define MLX4_DRIVER_H #include <linux/device.h> +#include <linux/mlx4/device.h> struct mlx4_dev; @@ -44,11 +45,6 @@ enum mlx4_dev_event { MLX4_DEV_EVENT_PORT_REINIT, }; -enum mlx4_protocol { - MLX4_PROTOCOL_IB, - MLX4_PROTOCOL_EN, -}; - struct mlx4_interface { void * (*add) (struct mlx4_dev *dev); void (*remove)(struct mlx4_dev *dev, void *context); diff --git a/include/linux/mtd/cfi.h b/include/linux/mtd/cfi.h index 4dd0c2cd7659..a9baee6864af 100644 --- a/include/linux/mtd/cfi.h +++ b/include/linux/mtd/cfi.h @@ -527,8 +527,7 @@ struct cfi_extquery *cfi_read_pri(struct map_info *map, uint16_t adr, uint16_t s struct cfi_fixup { uint16_t mfr; uint16_t id; - void (*fixup)(struct mtd_info *mtd, void* param); - void* param; + void (*fixup)(struct mtd_info *mtd); }; #define CFI_MFR_ANY 0xFFFF diff --git a/include/linux/mtd/fsmc.h b/include/linux/mtd/fsmc.h index 5d2556700ec2..6987995ad3cf 100644 --- a/include/linux/mtd/fsmc.h +++ b/include/linux/mtd/fsmc.h @@ -16,6 +16,7 @@ #ifndef __MTD_FSMC_H #define __MTD_FSMC_H +#include <linux/io.h> #include <linux/platform_device.h> #include <linux/mtd/physmap.h> #include <linux/types.h> @@ -27,7 +28,7 @@ /* * The placement of the Command Latch Enable (CLE) and - * Address Latch Enable (ALE) is twised around in the + * Address Latch Enable (ALE) is twisted around in the * SPEAR310 implementation. */ #if defined(CONFIG_MACH_SPEAR310) @@ -62,7 +63,7 @@ struct fsmc_nor_bank_regs { /* ctrl_tim register definitions */ -struct fsms_nand_bank_regs { +struct fsmc_nand_bank_regs { uint32_t pc; uint32_t sts; uint32_t comm; @@ -78,7 +79,7 @@ struct fsms_nand_bank_regs { struct fsmc_regs { struct fsmc_nor_bank_regs nor_bank_regs[FSMC_MAX_NOR_BANKS]; uint8_t reserved_1[0x40 - 0x20]; - struct fsms_nand_bank_regs bank_regs[FSMC_MAX_NAND_BANKS]; + struct fsmc_nand_bank_regs bank_regs[FSMC_MAX_NAND_BANKS]; uint8_t reserved_2[0xfe0 - 0xc0]; uint32_t peripid0; /* 0xfe0 */ uint32_t peripid1; /* 0xfe4 */ @@ -114,25 +115,6 @@ struct fsmc_regs { #define FSMC_THOLD_4 (4 << 16) #define FSMC_THIZ_1 (1 << 24) -/* peripid2 register definitions */ -#define FSMC_REVISION_MSK (0xf) -#define FSMC_REVISION_SHFT (0x4) - -#define FSMC_VER1 1 -#define FSMC_VER2 2 -#define FSMC_VER3 3 -#define FSMC_VER4 4 -#define FSMC_VER5 5 -#define FSMC_VER6 6 -#define FSMC_VER7 7 -#define FSMC_VER8 8 - -static inline uint32_t get_fsmc_version(struct fsmc_regs *regs) -{ - return (readl(®s->peripid2) >> FSMC_REVISION_SHFT) & - FSMC_REVISION_MSK; -} - /* * There are 13 bytes of ecc for every 512 byte block in FSMC version 8 * and it has to be read consecutively and immediately after the 512 diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h index fe8d77ebec13..9d5306bad117 100644 --- a/include/linux/mtd/mtd.h +++ b/include/linux/mtd/mtd.h @@ -144,6 +144,17 @@ struct mtd_info { */ uint32_t writesize; + /* + * Size of the write buffer used by the MTD. MTD devices having a write + * buffer can write multiple writesize chunks at a time. E.g. while + * writing 4 * writesize bytes to a device with 2 * writesize bytes + * buffer the MTD driver can (but doesn't have to) do 2 writesize + * operations, but not 4. Currently, all NANDs have writebufsize + * equivalent to writesize (NAND page size). Some NOR flashes do have + * writebufsize greater than writesize. + */ + uint32_t writebufsize; + uint32_t oobsize; // Amount of OOB data per block (e.g. 16) uint32_t oobavail; // Available OOB bytes per block diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 63e17d01fde9..1f489b247a29 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -448,6 +448,8 @@ struct nand_buffers { * See the defines for further explanation. * @badblockpos: [INTERN] position of the bad block marker in the oob * area. + * @badblockbits: [INTERN] number of bits to left-shift the bad block + * number * @cellinfo: [INTERN] MLC/multichip data from chip ident * @numchips: [INTERN] number of physical chips * @chipsize: [INTERN] the size of one chip for multichip arrays diff --git a/include/linux/mtd/onenand.h b/include/linux/mtd/onenand.h index 0c8815bfae1c..ae418e41d8f5 100644 --- a/include/linux/mtd/onenand.h +++ b/include/linux/mtd/onenand.h @@ -118,6 +118,8 @@ struct onenand_chip { int (*chip_probe)(struct mtd_info *mtd); int (*block_markbad)(struct mtd_info *mtd, loff_t ofs); int (*scan_bbt)(struct mtd_info *mtd); + int (*enable)(struct mtd_info *mtd); + int (*disable)(struct mtd_info *mtd); struct completion complete; int irq; @@ -137,6 +139,14 @@ struct onenand_chip { void *bbm; void *priv; + + /* + * Shows that the current operation is composed + * of sequence of commands. For example, cache program. + * Such command status OnGo bit is checked at the end of + * sequence. + */ + unsigned int ongoing; }; /* @@ -171,6 +181,9 @@ struct onenand_chip { #define ONENAND_IS_2PLANE(this) (0) #endif +#define ONENAND_IS_CACHE_PROGRAM(this) \ + (this->options & ONENAND_HAS_CACHE_PROGRAM) + /* Check byte access in OneNAND */ #define ONENAND_CHECK_BYTE_ACCESS(addr) (addr & 0x1) @@ -181,6 +194,7 @@ struct onenand_chip { #define ONENAND_HAS_UNLOCK_ALL (0x0002) #define ONENAND_HAS_2PLANE (0x0004) #define ONENAND_HAS_4KB_PAGE (0x0008) +#define ONENAND_HAS_CACHE_PROGRAM (0x0010) #define ONENAND_SKIP_UNLOCK_CHECK (0x0100) #define ONENAND_PAGEBUF_ALLOC (0x1000) #define ONENAND_OOBBUF_ALLOC (0x2000) diff --git a/include/linux/mtd/partitions.h b/include/linux/mtd/partitions.h index 2b54316591d2..4a0a8ba90a72 100644 --- a/include/linux/mtd/partitions.h +++ b/include/linux/mtd/partitions.h @@ -89,7 +89,7 @@ static inline int mtd_has_cmdlinepart(void) { return 1; } static inline int mtd_has_cmdlinepart(void) { return 0; } #endif -int mtd_is_master(struct mtd_info *mtd); +int mtd_is_partition(struct mtd_info *mtd); int mtd_add_partition(struct mtd_info *master, char *name, long long offset, long long length); int mtd_del_partition(struct mtd_info *master, int partno); diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h index 0ef22a1f129e..c84d900fbbb3 100644 --- a/include/linux/of_fdt.h +++ b/include/linux/of_fdt.h @@ -97,7 +97,7 @@ extern void early_init_dt_check_for_initrd(unsigned long node); extern int early_init_dt_scan_memory(unsigned long node, const char *uname, int depth, void *data); extern void early_init_dt_add_memory_arch(u64 base, u64 size); -extern u64 early_init_dt_alloc_memory_arch(u64 size, u64 align); +extern void * early_init_dt_alloc_memory_arch(u64 size, u64 align); extern u64 dt_mem_next_cell(int s, __be32 **cellp); /* diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index e04c4888d1fd..55cd0a0bc977 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -47,10 +47,13 @@ #include <linux/list.h> #include <linux/rwsem.h> #include <linux/scatterlist.h> +#include <linux/workqueue.h> #include <asm/atomic.h> #include <asm/uaccess.h> +extern struct workqueue_struct *ib_wq; + union ib_gid { u8 raw[16]; struct { diff --git a/mm/internal.h b/mm/internal.h index 4c98630f0f77..69488205723d 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -39,15 +39,6 @@ static inline void __put_page(struct page *page) extern unsigned long highest_memmap_pfn; -#ifdef CONFIG_SMP -extern int putback_active_lru_page(struct zone *zone, struct page *page); -#else -static inline int putback_active_lru_page(struct zone *zone, struct page *page) -{ - return 0; -} -#endif - /* * in mm/vmscan.c: */ diff --git a/mm/swap.c b/mm/swap.c index bbc1ce9f9460..c02f93611a84 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -178,13 +178,15 @@ void put_pages_list(struct list_head *pages) } EXPORT_SYMBOL(put_pages_list); -static void pagevec_lru_move_fn(struct pagevec *pvec, - void (*move_fn)(struct page *page, void *arg), - void *arg) +/* + * pagevec_move_tail() must be called with IRQ disabled. + * Otherwise this may cause nasty races. + */ +static void pagevec_move_tail(struct pagevec *pvec) { int i; + int pgmoved = 0; struct zone *zone = NULL; - unsigned long flags = 0; for (i = 0; i < pagevec_count(pvec); i++) { struct page *page = pvec->pages[i]; @@ -192,41 +194,21 @@ static void pagevec_lru_move_fn(struct pagevec *pvec, if (pagezone != zone) { if (zone) - spin_unlock_irqrestore(&zone->lru_lock, flags); + spin_unlock(&zone->lru_lock); zone = pagezone; - spin_lock_irqsave(&zone->lru_lock, flags); + spin_lock(&zone->lru_lock); + } + if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) { + int lru = page_lru_base_type(page); + list_move_tail(&page->lru, &zone->lru[lru].list); + pgmoved++; } - - (*move_fn)(page, arg); } if (zone) - spin_unlock_irqrestore(&zone->lru_lock, flags); - release_pages(pvec->pages, pagevec_count(pvec), pvec->cold); - pagevec_reinit(pvec); -} - -static void pagevec_move_tail_fn(struct page *page, void *arg) -{ - int *pgmoved = arg; - struct zone *zone = page_zone(page); - - if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) { - int lru = page_lru_base_type(page); - list_move_tail(&page->lru, &zone->lru[lru].list); - (*pgmoved)++; - } -} - -/* - * pagevec_move_tail() must be called with IRQ disabled. - * Otherwise this may cause nasty races. - */ -static void pagevec_move_tail(struct pagevec *pvec) -{ - int pgmoved = 0; - - pagevec_lru_move_fn(pvec, pagevec_move_tail_fn, &pgmoved); + spin_unlock(&zone->lru_lock); __count_vm_events(PGROTATED, pgmoved); + release_pages(pvec->pages, pvec->nr, pvec->cold); + pagevec_reinit(pvec); } /* @@ -234,7 +216,7 @@ static void pagevec_move_tail(struct pagevec *pvec) * reclaim. If it still appears to be reclaimable, move it to the tail of the * inactive list. */ -void rotate_reclaimable_page(struct page *page) +void rotate_reclaimable_page(struct page *page) { if (!PageLocked(page) && !PageDirty(page) && !PageActive(page) && !PageUnevictable(page) && PageLRU(page)) { @@ -271,94 +253,27 @@ static void update_page_reclaim_stat(struct zone *zone, struct page *page, } /* - * A page will go to active list either by activate_page or putback_lru_page. - * In the activate_page case, the page hasn't active bit set. The page might - * not in LRU list because it's isolated before it gets a chance to be moved to - * active list. The window is small because pagevec just stores several pages. - * For such case, we do nothing for such page. - * In the putback_lru_page case, the page isn't in lru list but has active - * bit set + * FIXME: speed this up? */ -static void __activate_page(struct page *page, void *arg) +void activate_page(struct page *page) { struct zone *zone = page_zone(page); - int file = page_is_file_cache(page); - int lru = page_lru_base_type(page); - bool putback = !PageLRU(page); - - /* The page is isolated before it's moved to active list */ - if (!PageLRU(page) && !PageActive(page)) - return; - if ((PageLRU(page) && PageActive(page)) || PageUnevictable(page)) - return; - - if (!putback) - del_page_from_lru_list(zone, page, lru); - else - SetPageLRU(page); - - SetPageActive(page); - lru += LRU_ACTIVE; - add_page_to_lru_list(zone, page, lru); - - if (putback) - return; - __count_vm_event(PGACTIVATE); - update_page_reclaim_stat(zone, page, file, 1); -} -#ifdef CONFIG_SMP -static DEFINE_PER_CPU(struct pagevec, activate_page_pvecs); - -static void activate_page_drain(int cpu) -{ - struct pagevec *pvec = &per_cpu(activate_page_pvecs, cpu); - - if (pagevec_count(pvec)) - pagevec_lru_move_fn(pvec, __activate_page, NULL); -} - -void activate_page(struct page *page) -{ + spin_lock_irq(&zone->lru_lock); if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) { - struct pagevec *pvec = &get_cpu_var(activate_page_pvecs); - - page_cache_get(page); - if (!pagevec_add(pvec, page)) - pagevec_lru_move_fn(pvec, __activate_page, NULL); - put_cpu_var(activate_page_pvecs); - } -} + int file = page_is_file_cache(page); + int lru = page_lru_base_type(page); + del_page_from_lru_list(zone, page, lru); -/* Caller should hold zone->lru_lock */ -int putback_active_lru_page(struct zone *zone, struct page *page) -{ - struct pagevec *pvec = &get_cpu_var(activate_page_pvecs); + SetPageActive(page); + lru += LRU_ACTIVE; + add_page_to_lru_list(zone, page, lru); + __count_vm_event(PGACTIVATE); - if (!pagevec_add(pvec, page)) { - spin_unlock_irq(&zone->lru_lock); - pagevec_lru_move_fn(pvec, __activate_page, NULL); - spin_lock_irq(&zone->lru_lock); + update_page_reclaim_stat(zone, page, file, 1); } - put_cpu_var(activate_page_pvecs); - return 1; -} - -#else -static inline void activate_page_drain(int cpu) -{ -} - -void activate_page(struct page *page) -{ - struct zone *zone = page_zone(page); - - spin_lock_irq(&zone->lru_lock); - if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) - __activate_page(page, NULL); spin_unlock_irq(&zone->lru_lock); } -#endif /* * Mark a page as having seen activity. @@ -457,7 +372,6 @@ static void drain_cpu_pagevecs(int cpu) pagevec_move_tail(pvec); local_irq_restore(flags); } - activate_page_drain(cpu); } void lru_add_drain(void) @@ -602,33 +516,44 @@ void lru_add_page_tail(struct zone* zone, } } -static void ____pagevec_lru_add_fn(struct page *page, void *arg) -{ - enum lru_list lru = (enum lru_list)arg; - struct zone *zone = page_zone(page); - int file = is_file_lru(lru); - int active = is_active_lru(lru); - - VM_BUG_ON(PageActive(page)); - VM_BUG_ON(PageUnevictable(page)); - VM_BUG_ON(PageLRU(page)); - - SetPageLRU(page); - if (active) - SetPageActive(page); - update_page_reclaim_stat(zone, page, file, active); - add_page_to_lru_list(zone, page, lru); -} - /* * Add the passed pages to the LRU, then drop the caller's refcount * on them. Reinitialises the caller's pagevec. */ void ____pagevec_lru_add(struct pagevec *pvec, enum lru_list lru) { + int i; + struct zone *zone = NULL; + VM_BUG_ON(is_unevictable_lru(lru)); - pagevec_lru_move_fn(pvec, ____pagevec_lru_add_fn, (void *)lru); + for (i = 0; i < pagevec_count(pvec); i++) { + struct page *page = pvec->pages[i]; + struct zone *pagezone = page_zone(page); + int file; + int active; + + if (pagezone != zone) { + if (zone) + spin_unlock_irq(&zone->lru_lock); + zone = pagezone; + spin_lock_irq(&zone->lru_lock); + } + VM_BUG_ON(PageActive(page)); + VM_BUG_ON(PageUnevictable(page)); + VM_BUG_ON(PageLRU(page)); + SetPageLRU(page); + active = is_active_lru(lru); + file = is_file_lru(lru); + if (active) + SetPageActive(page); + update_page_reclaim_stat(zone, page, file, active); + add_page_to_lru_list(zone, page, lru); + } + if (zone) + spin_unlock_irq(&zone->lru_lock); + release_pages(pvec->pages, pvec->nr, pvec->cold); + pagevec_reinit(pvec); } EXPORT_SYMBOL(____pagevec_lru_add); diff --git a/mm/vmscan.c b/mm/vmscan.c index 99999a9b2b0b..47a50962ce81 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1271,16 +1271,14 @@ putback_lru_pages(struct zone *zone, struct scan_control *sc, spin_lock_irq(&zone->lru_lock); continue; } + SetPageLRU(page); lru = page_lru(page); + add_page_to_lru_list(zone, page, lru); if (is_active_lru(lru)) { int file = is_file_lru(lru); int numpages = hpage_nr_pages(page); reclaim_stat->recent_rotated[file] += numpages; - if (putback_active_lru_page(zone, page)) - continue; } - SetPageLRU(page); - add_page_to_lru_list(zone, page, lru); if (!pagevec_add(&pvec, page)) { spin_unlock_irq(&zone->lru_lock); __pagevec_release(&pvec); |