diff options
104 files changed, 1031 insertions, 485 deletions
diff --git a/Documentation/RCU/NMI-RCU.txt b/Documentation/RCU/NMI-RCU.txt index a6d32e65d222..a8536cb88091 100644 --- a/Documentation/RCU/NMI-RCU.txt +++ b/Documentation/RCU/NMI-RCU.txt @@ -34,7 +34,7 @@ NMI handler. cpu = smp_processor_id(); ++nmi_count(cpu); - if (!rcu_dereference(nmi_callback)(regs, cpu)) + if (!rcu_dereference_sched(nmi_callback)(regs, cpu)) default_do_nmi(regs); nmi_exit(); @@ -47,12 +47,13 @@ function pointer. If this handler returns zero, do_nmi() invokes the default_do_nmi() function to handle a machine-specific NMI. Finally, preemption is restored. -Strictly speaking, rcu_dereference() is not needed, since this code runs -only on i386, which does not need rcu_dereference() anyway. However, -it is a good documentation aid, particularly for anyone attempting to -do something similar on Alpha. +In theory, rcu_dereference_sched() is not needed, since this code runs +only on i386, which in theory does not need rcu_dereference_sched() +anyway. However, in practice it is a good documentation aid, particularly +for anyone attempting to do something similar on Alpha or on systems +with aggressive optimizing compilers. -Quick Quiz: Why might the rcu_dereference() be necessary on Alpha, +Quick Quiz: Why might the rcu_dereference_sched() be necessary on Alpha, given that the code referenced by the pointer is read-only? @@ -99,17 +100,21 @@ invoke irq_enter() and irq_exit() on NMI entry and exit, respectively. Answer to Quick Quiz - Why might the rcu_dereference() be necessary on Alpha, given + Why might the rcu_dereference_sched() be necessary on Alpha, given that the code referenced by the pointer is read-only? Answer: The caller to set_nmi_callback() might well have - initialized some data that is to be used by the - new NMI handler. In this case, the rcu_dereference() - would be needed, because otherwise a CPU that received - an NMI just after the new handler was set might see - the pointer to the new NMI handler, but the old - pre-initialized version of the handler's data. - - More important, the rcu_dereference() makes it clear - to someone reading the code that the pointer is being - protected by RCU. + initialized some data that is to be used by the new NMI + handler. In this case, the rcu_dereference_sched() would + be needed, because otherwise a CPU that received an NMI + just after the new handler was set might see the pointer + to the new NMI handler, but the old pre-initialized + version of the handler's data. + + This same sad story can happen on other CPUs when using + a compiler with aggressive pointer-value speculation + optimizations. + + More important, the rcu_dereference_sched() makes it + clear to someone reading the code that the pointer is + being protected by RCU-sched. diff --git a/Documentation/RCU/checklist.txt b/Documentation/RCU/checklist.txt index cbc180f90194..790d1a812376 100644 --- a/Documentation/RCU/checklist.txt +++ b/Documentation/RCU/checklist.txt @@ -260,7 +260,8 @@ over a rather long period of time, but improvements are always welcome! The reason that it is permissible to use RCU list-traversal primitives when the update-side lock is held is that doing so can be quite helpful in reducing code bloat when common code is - shared between readers and updaters. + shared between readers and updaters. Additional primitives + are provided for this case, as discussed in lockdep.txt. 10. Conversely, if you are in an RCU read-side critical section, and you don't hold the appropriate update-side lock, you -must- @@ -344,8 +345,8 @@ over a rather long period of time, but improvements are always welcome! requiring SRCU's read-side deadlock immunity or low read-side realtime latency. - Note that, rcu_assign_pointer() and rcu_dereference() relate to - SRCU just as they do to other forms of RCU. + Note that, rcu_assign_pointer() relates to SRCU just as they do + to other forms of RCU. 15. The whole point of call_rcu(), synchronize_rcu(), and friends is to wait until all pre-existing readers have finished before diff --git a/Documentation/RCU/lockdep.txt b/Documentation/RCU/lockdep.txt index fe24b58627bd..d7a49b2f6994 100644 --- a/Documentation/RCU/lockdep.txt +++ b/Documentation/RCU/lockdep.txt @@ -32,9 +32,20 @@ checking of rcu_dereference() primitives: srcu_dereference(p, sp): Check for SRCU read-side critical section. rcu_dereference_check(p, c): - Use explicit check expression "c". + Use explicit check expression "c". This is useful in + code that is invoked by both readers and updaters. rcu_dereference_raw(p) Don't check. (Use sparingly, if at all.) + rcu_dereference_protected(p, c): + Use explicit check expression "c", and omit all barriers + and compiler constraints. This is useful when the data + structure cannot change, for example, in code that is + invoked only by updaters. + rcu_access_pointer(p): + Return the value of the pointer and omit all barriers, + but retain the compiler constraints that prevent duplicating + or coalescsing. This is useful when when testing the + value of the pointer itself, for example, against NULL. The rcu_dereference_check() check expression can be any boolean expression, but would normally include one of the rcu_read_lock_held() @@ -59,7 +70,20 @@ In case (1), the pointer is picked up in an RCU-safe manner for vanilla RCU read-side critical sections, in case (2) the ->file_lock prevents any change from taking place, and finally, in case (3) the current task is the only task accessing the file_struct, again preventing any change -from taking place. +from taking place. If the above statement was invoked only from updater +code, it could instead be written as follows: + + file = rcu_dereference_protected(fdt->fd[fd], + lockdep_is_held(&files->file_lock) || + atomic_read(&files->count) == 1); + +This would verify cases #2 and #3 above, and furthermore lockdep would +complain if this was used in an RCU read-side critical section unless one +of these two cases held. Because rcu_dereference_protected() omits all +barriers and compiler constraints, it generates better code than do the +other flavors of rcu_dereference(). On the other hand, it is illegal +to use rcu_dereference_protected() if either the RCU-protected pointer +or the RCU-protected data that it points to can change concurrently. There are currently only "universal" versions of the rcu_assign_pointer() and RCU list-/tree-traversal primitives, which do not (yet) check for diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt index 1dc00ee97163..cfaac34c4557 100644 --- a/Documentation/RCU/whatisRCU.txt +++ b/Documentation/RCU/whatisRCU.txt @@ -840,6 +840,12 @@ SRCU: Initialization/cleanup init_srcu_struct cleanup_srcu_struct +All: lockdep-checked RCU-protected pointer access + + rcu_dereference_check + rcu_dereference_protected + rcu_access_pointer + See the comment headers in the source code (or the docbook generated from them) for more information. @@ -1,8 +1,8 @@ VERSION = 2 PATCHLEVEL = 6 SUBLEVEL = 34 -EXTRAVERSION = -rc4 -NAME = Man-Eating Seals of Antiquity +EXTRAVERSION = -rc5 +NAME = Sheep on Meth # *DOCUMENTATION* # To see a list of typical targets execute "make help" diff --git a/arch/arm/include/asm/highmem.h b/arch/arm/include/asm/highmem.h index 7f36d00600b4..feb988a7ec37 100644 --- a/arch/arm/include/asm/highmem.h +++ b/arch/arm/include/asm/highmem.h @@ -11,7 +11,11 @@ #define kmap_prot PAGE_KERNEL -#define flush_cache_kmaps() flush_cache_all() +#define flush_cache_kmaps() \ + do { \ + if (cache_is_vivt()) \ + flush_cache_all(); \ + } while (0) extern pte_t *pkmap_page_table; @@ -21,11 +25,20 @@ extern void *kmap_high(struct page *page); extern void *kmap_high_get(struct page *page); extern void kunmap_high(struct page *page); +extern void *kmap_high_l1_vipt(struct page *page, pte_t *saved_pte); +extern void kunmap_high_l1_vipt(struct page *page, pte_t saved_pte); + +/* + * The following functions are already defined by <linux/highmem.h> + * when CONFIG_HIGHMEM is not set. + */ +#ifdef CONFIG_HIGHMEM extern void *kmap(struct page *page); extern void kunmap(struct page *page); extern void *kmap_atomic(struct page *page, enum km_type type); extern void kunmap_atomic(void *kvaddr, enum km_type type); extern void *kmap_atomic_pfn(unsigned long pfn, enum km_type type); extern struct page *kmap_atomic_to_page(const void *ptr); +#endif #endif diff --git a/arch/arm/include/asm/kmap_types.h b/arch/arm/include/asm/kmap_types.h index c019949a5189..c4b2ea3fbe42 100644 --- a/arch/arm/include/asm/kmap_types.h +++ b/arch/arm/include/asm/kmap_types.h @@ -18,6 +18,7 @@ enum km_type { KM_IRQ1, KM_SOFTIRQ0, KM_SOFTIRQ1, + KM_L1_CACHE, KM_L2_CACHE, KM_TYPE_NR }; diff --git a/arch/arm/include/asm/ucontext.h b/arch/arm/include/asm/ucontext.h index bf65e9f4525d..47f023aa8495 100644 --- a/arch/arm/include/asm/ucontext.h +++ b/arch/arm/include/asm/ucontext.h @@ -59,23 +59,22 @@ struct iwmmxt_sigframe { #endif /* CONFIG_IWMMXT */ #ifdef CONFIG_VFP -#if __LINUX_ARM_ARCH__ < 6 -/* For ARM pre-v6, we use fstmiax and fldmiax. This adds one extra - * word after the registers, and a word of padding at the end for - * alignment. */ #define VFP_MAGIC 0x56465001 -#define VFP_STORAGE_SIZE 152 -#else -#define VFP_MAGIC 0x56465002 -#define VFP_STORAGE_SIZE 144 -#endif struct vfp_sigframe { unsigned long magic; unsigned long size; - union vfp_state storage; -}; + struct user_vfp ufp; + struct user_vfp_exc ufp_exc; +} __attribute__((__aligned__(8))); + +/* + * 8 byte for magic and size, 264 byte for ufp, 12 bytes for ufp_exc, + * 4 bytes padding. + */ +#define VFP_STORAGE_SIZE sizeof(struct vfp_sigframe) + #endif /* CONFIG_VFP */ /* @@ -91,7 +90,7 @@ struct aux_sigframe { #ifdef CONFIG_IWMMXT struct iwmmxt_sigframe iwmmxt; #endif -#if 0 && defined CONFIG_VFP /* Not yet saved. */ +#ifdef CONFIG_VFP struct vfp_sigframe vfp; #endif /* Something that isn't a valid magic number for any coprocessor. */ diff --git a/arch/arm/include/asm/user.h b/arch/arm/include/asm/user.h index df95e050f9dd..05ac4b06876a 100644 --- a/arch/arm/include/asm/user.h +++ b/arch/arm/include/asm/user.h @@ -83,11 +83,21 @@ struct user{ /* * User specific VFP registers. If only VFPv2 is present, registers 16 to 31 - * are ignored by the ptrace system call. + * are ignored by the ptrace system call and the signal handler. */ struct user_vfp { unsigned long long fpregs[32]; unsigned long fpscr; }; +/* + * VFP exception registers exposed to user space during signal delivery. + * Fields not relavant to the current VFP architecture are ignored. + */ +struct user_vfp_exc { + unsigned long fpexc; + unsigned long fpinst; + unsigned long fpinst2; +}; + #endif /* _ARM_USER_H */ diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c index e7714f367eb8..907d5a620bca 100644 --- a/arch/arm/kernel/signal.c +++ b/arch/arm/kernel/signal.c @@ -18,6 +18,7 @@ #include <asm/cacheflush.h> #include <asm/ucontext.h> #include <asm/unistd.h> +#include <asm/vfp.h> #include "ptrace.h" #include "signal.h" @@ -175,6 +176,90 @@ static int restore_iwmmxt_context(struct iwmmxt_sigframe *frame) #endif +#ifdef CONFIG_VFP + +static int preserve_vfp_context(struct vfp_sigframe __user *frame) +{ + struct thread_info *thread = current_thread_info(); + struct vfp_hard_struct *h = &thread->vfpstate.hard; + const unsigned long magic = VFP_MAGIC; + const unsigned long size = VFP_STORAGE_SIZE; + int err = 0; + + vfp_sync_hwstate(thread); + __put_user_error(magic, &frame->magic, err); + __put_user_error(size, &frame->size, err); + + /* + * Copy the floating point registers. There can be unused + * registers see asm/hwcap.h for details. + */ + err |= __copy_to_user(&frame->ufp.fpregs, &h->fpregs, + sizeof(h->fpregs)); + /* + * Copy the status and control register. + */ + __put_user_error(h->fpscr, &frame->ufp.fpscr, err); + + /* + * Copy the exception registers. + */ + __put_user_error(h->fpexc, &frame->ufp_exc.fpexc, err); + __put_user_error(h->fpinst, &frame->ufp_exc.fpinst, err); + __put_user_error(h->fpinst2, &frame->ufp_exc.fpinst2, err); + + return err ? -EFAULT : 0; +} + +static int restore_vfp_context(struct vfp_sigframe __user *frame) +{ + struct thread_info *thread = current_thread_info(); + struct vfp_hard_struct *h = &thread->vfpstate.hard; + unsigned long magic; + unsigned long size; + unsigned long fpexc; + int err = 0; + + __get_user_error(magic, &frame->magic, err); + __get_user_error(size, &frame->size, err); + + if (err) + return -EFAULT; + if (magic != VFP_MAGIC || size != VFP_STORAGE_SIZE) + return -EINVAL; + + /* + * Copy the floating point registers. There can be unused + * registers see asm/hwcap.h for details. + */ + err |= __copy_from_user(&h->fpregs, &frame->ufp.fpregs, + sizeof(h->fpregs)); + /* + * Copy the status and control register. + */ + __get_user_error(h->fpscr, &frame->ufp.fpscr, err); + + /* + * Sanitise and restore the exception registers. + */ + __get_user_error(fpexc, &frame->ufp_exc.fpexc, err); + /* Ensure the VFP is enabled. */ + fpexc |= FPEXC_EN; + /* Ensure FPINST2 is invalid and the exception flag is cleared. */ + fpexc &= ~(FPEXC_EX | FPEXC_FP2V); + h->fpexc = fpexc; + + __get_user_error(h->fpinst, &frame->ufp_exc.fpinst, err); + __get_user_error(h->fpinst2, &frame->ufp_exc.fpinst2, err); + + if (!err) + vfp_flush_hwstate(thread); + + return err ? -EFAULT : 0; +} + +#endif + /* * Do a signal return; undo the signal stack. These are aligned to 64-bit. */ @@ -233,8 +318,8 @@ static int restore_sigframe(struct pt_regs *regs, struct sigframe __user *sf) err |= restore_iwmmxt_context(&aux->iwmmxt); #endif #ifdef CONFIG_VFP -// if (err == 0) -// err |= vfp_restore_state(&sf->aux.vfp); + if (err == 0) + err |= restore_vfp_context(&aux->vfp); #endif return err; @@ -348,8 +433,8 @@ setup_sigframe(struct sigframe __user *sf, struct pt_regs *regs, sigset_t *set) err |= preserve_iwmmxt_context(&aux->iwmmxt); #endif #ifdef CONFIG_VFP -// if (err == 0) -// err |= vfp_save_state(&sf->aux.vfp); + if (err == 0) + err |= preserve_vfp_context(&aux->vfp); #endif __put_user_error(0, &aux->end_magic, err); diff --git a/arch/arm/mach-at91/Makefile b/arch/arm/mach-at91/Makefile index 027dd570dcc3..d4004557532a 100644 --- a/arch/arm/mach-at91/Makefile +++ b/arch/arm/mach-at91/Makefile @@ -16,8 +16,8 @@ obj-$(CONFIG_ARCH_AT91SAM9261) += at91sam9261.o at91sam926x_time.o at91sam9261_d obj-$(CONFIG_ARCH_AT91SAM9G10) += at91sam9261.o at91sam926x_time.o at91sam9261_devices.o sam9_smc.o obj-$(CONFIG_ARCH_AT91SAM9263) += at91sam9263.o at91sam926x_time.o at91sam9263_devices.o sam9_smc.o obj-$(CONFIG_ARCH_AT91SAM9RL) += at91sam9rl.o at91sam926x_time.o at91sam9rl_devices.o sam9_smc.o -obj-$(CONFIG_ARCH_AT91SAM9G20) += at91sam9260.o at91sam926x_time.o at91sam9260_devices.o sam9_smc.o - obj-$(CONFIG_ARCH_AT91SAM9G45) += at91sam9g45.o at91sam926x_time.o at91sam9g45_devices.o sam9_smc.o +obj-$(CONFIG_ARCH_AT91SAM9G20) += at91sam9260.o at91sam926x_time.o at91sam9260_devices.o sam9_smc.o +obj-$(CONFIG_ARCH_AT91SAM9G45) += at91sam9g45.o at91sam926x_time.o at91sam9g45_devices.o sam9_smc.o obj-$(CONFIG_ARCH_AT91CAP9) += at91cap9.o at91sam926x_time.o at91cap9_devices.o sam9_smc.o obj-$(CONFIG_ARCH_AT572D940HF) += at572d940hf.o at91sam926x_time.o at572d940hf_devices.o sam9_smc.o obj-$(CONFIG_ARCH_AT91X40) += at91x40.o at91x40_time.o diff --git a/arch/arm/mach-at91/pm_slowclock.S b/arch/arm/mach-at91/pm_slowclock.S index 9fcbd6ca0090..9c5b48e68a71 100644 --- a/arch/arm/mach-at91/pm_slowclock.S +++ b/arch/arm/mach-at91/pm_slowclock.S @@ -175,8 +175,6 @@ ENTRY(at91_slow_clock) orr r3, r3, #(1 << 29) /* bit 29 always set */ str r3, [r1, #(AT91_CKGR_PLLAR - AT91_PMC)] - wait_pllalock - /* Save PLLB setting and disable it */ ldr r3, [r1, #(AT91_CKGR_PLLBR - AT91_PMC)] str r3, .saved_pllbr @@ -184,8 +182,6 @@ ENTRY(at91_slow_clock) mov r3, #AT91_PMC_PLLCOUNT str r3, [r1, #(AT91_CKGR_PLLBR - AT91_PMC)] - wait_pllblock - /* Turn off the main oscillator */ ldr r3, [r1, #(AT91_CKGR_MOR - AT91_PMC)] bic r3, r3, #AT91_PMC_MOSCEN diff --git a/arch/arm/mm/copypage-v6.c b/arch/arm/mm/copypage-v6.c index 8bca4dea6dfa..f55fa1044f72 100644 --- a/arch/arm/mm/copypage-v6.c +++ b/arch/arm/mm/copypage-v6.c @@ -41,14 +41,7 @@ static void v6_copy_user_highpage_nonaliasing(struct page *to, kfrom = kmap_atomic(from, KM_USER0); kto = kmap_atomic(to, KM_USER1); copy_page(kto, kfrom); -#ifdef CONFIG_HIGHMEM - /* - * kmap_atomic() doesn't set the page virtual address, and - * kunmap_atomic() takes care of cache flushing already. - */ - if (page_address(to) != NULL) -#endif - __cpuc_flush_dcache_area(kto, PAGE_SIZE); + __cpuc_flush_dcache_area(kto, PAGE_SIZE); kunmap_atomic(kto, KM_USER1); kunmap_atomic(kfrom, KM_USER0); } diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 1351edc0b26f..13fa536d82e6 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -464,6 +464,11 @@ static void dma_cache_maint_page(struct page *page, unsigned long offset, vaddr += offset; op(vaddr, len, dir); kunmap_high(page); + } else if (cache_is_vipt()) { + pte_t saved_pte; + vaddr = kmap_high_l1_vipt(page, &saved_pte); + op(vaddr + offset, len, dir); + kunmap_high_l1_vipt(page, saved_pte); } } else { vaddr = page_address(page) + offset; diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c index e34f095e2090..c6844cb9b508 100644 --- a/arch/arm/mm/flush.c +++ b/arch/arm/mm/flush.c @@ -13,6 +13,7 @@ #include <asm/cacheflush.h> #include <asm/cachetype.h> +#include <asm/highmem.h> #include <asm/smp_plat.h> #include <asm/system.h> #include <asm/tlbflush.h> @@ -152,21 +153,25 @@ void copy_to_user_page(struct vm_area_struct *vma, struct page *page, void __flush_dcache_page(struct address_space *mapping, struct page *page) { - void *addr = page_address(page); - /* * Writeback any data associated with the kernel mapping of this * page. This ensures that data in the physical page is mutually * coherent with the kernels mapping. */ -#ifdef CONFIG_HIGHMEM - /* - * kmap_atomic() doesn't set the page virtual address, and - * kunmap_atomic() takes care of cache flushing already. - */ - if (addr) -#endif - __cpuc_flush_dcache_area(addr, PAGE_SIZE); + if (!PageHighMem(page)) { + __cpuc_flush_dcache_area(page_address(page), PAGE_SIZE); + } else { + void *addr = kmap_high_get(page); + if (addr) { + __cpuc_flush_dcache_area(addr, PAGE_SIZE); + kunmap_high(page); + } else if (cache_is_vipt()) { + pte_t saved_pte; + addr = kmap_high_l1_vipt(page, &saved_pte); + __cpuc_flush_dcache_area(addr, PAGE_SIZE); + kunmap_high_l1_vipt(page, saved_pte); + } + } /* * If this is a page cache page, and we have an aliasing VIPT cache, diff --git a/arch/arm/mm/highmem.c b/arch/arm/mm/highmem.c index 2be1ec7c1b41..77b030f5ec09 100644 --- a/arch/arm/mm/highmem.c +++ b/arch/arm/mm/highmem.c @@ -79,7 +79,8 @@ void kunmap_atomic(void *kvaddr, enum km_type type) unsigned int idx = type + KM_TYPE_NR * smp_processor_id(); if (kvaddr >= (void *)FIXADDR_START) { - __cpuc_flush_dcache_area((void *)vaddr, PAGE_SIZE); + if (cache_is_vivt()) + __cpuc_flush_dcache_area((void *)vaddr, PAGE_SIZE); #ifdef CONFIG_DEBUG_HIGHMEM BUG_ON(vaddr != __fix_to_virt(FIX_KMAP_BEGIN + idx)); set_pte_ext(TOP_PTE(vaddr), __pte(0), 0); @@ -124,3 +125,87 @@ struct page *kmap_atomic_to_page(const void *ptr) pte = TOP_PTE(vaddr); return pte_page(*pte); } + +#ifdef CONFIG_CPU_CACHE_VIPT + +#include <linux/percpu.h> + +/* + * The VIVT cache of a highmem page is always flushed before the page + * is unmapped. Hence unmapped highmem pages need no cache maintenance + * in that case. + * + * However unmapped pages may still be cached with a VIPT cache, and + * it is not possible to perform cache maintenance on them using physical + * addresses unfortunately. So we have no choice but to set up a temporary + * virtual mapping for that purpose. + * + * Yet this VIPT cache maintenance may be triggered from DMA support + * functions which are possibly called from interrupt context. As we don't + * want to keep interrupt disabled all the time when such maintenance is + * taking place, we therefore allow for some reentrancy by preserving and + * restoring the previous fixmap entry before the interrupted context is + * resumed. If the reentrancy depth is 0 then there is no need to restore + * the previous fixmap, and leaving the current one in place allow it to + * be reused the next time without a TLB flush (common with DMA). + */ + +static DEFINE_PER_CPU(int, kmap_high_l1_vipt_depth); + +void *kmap_high_l1_vipt(struct page *page, pte_t *saved_pte) +{ + unsigned int idx, cpu = smp_processor_id(); + int *depth = &per_cpu(kmap_high_l1_vipt_depth, cpu); + unsigned long vaddr, flags; + pte_t pte, *ptep; + + idx = KM_L1_CACHE + KM_TYPE_NR * cpu; + vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); + ptep = TOP_PTE(vaddr); + pte = mk_pte(page, kmap_prot); + + if (!in_interrupt()) + preempt_disable(); + + raw_local_irq_save(flags); + (*depth)++; + if (pte_val(*ptep) == pte_val(pte)) { + *saved_pte = pte; + } else { + *saved_pte = *ptep; + set_pte_ext(ptep, pte, 0); + local_flush_tlb_kernel_page(vaddr); + } + raw_local_irq_restore(flags); + + return (void *)vaddr; +} + +void kunmap_high_l1_vipt(struct page *page, pte_t saved_pte) +{ + unsigned int idx, cpu = smp_processor_id(); + int *depth = &per_cpu(kmap_high_l1_vipt_depth, cpu); + unsigned long vaddr, flags; + pte_t pte, *ptep; + + idx = KM_L1_CACHE + KM_TYPE_NR * cpu; + vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); + ptep = TOP_PTE(vaddr); + pte = mk_pte(page, kmap_prot); + + BUG_ON(pte_val(*ptep) != pte_val(pte)); + BUG_ON(*depth <= 0); + + raw_local_irq_save(flags); + (*depth)--; + if (*depth != 0 && pte_val(pte) != pte_val(saved_pte)) { + set_pte_ext(ptep, saved_pte, 0); + local_flush_tlb_kernel_page(vaddr); + } + raw_local_irq_restore(flags); + + if (!in_interrupt()) + preempt_enable(); +} + +#endif /* CONFIG_CPU_CACHE_VIPT */ diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 4223d086aa17..241c24a1c18f 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -1054,10 +1054,12 @@ void setup_mm_for_reboot(char mode) pgd_t *pgd; int i; - if (current->mm && current->mm->pgd) - pgd = current->mm->pgd; - else - pgd = init_mm.pgd; + /* + * We need to access to user-mode page tables here. For kernel threads + * we don't have any user-mode mappings so we use the context that we + * "borrowed". + */ + pgd = current->active_mm->pgd; base_pmdval = PMD_SECT_AP_WRITE | PMD_SECT_AP_READ | PMD_TYPE_SECT; if (cpu_architecture() <= CPU_ARCH_ARMv5TEJ && !cpu_is_xscale()) diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c index a420cb949328..315a540c7ce5 100644 --- a/arch/arm/vfp/vfpmodule.c +++ b/arch/arm/vfp/vfpmodule.c @@ -428,26 +428,6 @@ static void vfp_pm_init(void) static inline void vfp_pm_init(void) { } #endif /* CONFIG_PM */ -/* - * Synchronise the hardware VFP state of a thread other than current with the - * saved one. This function is used by the ptrace mechanism. - */ -#ifdef CONFIG_SMP -void vfp_sync_hwstate(struct thread_info *thread) -{ -} - -void vfp_flush_hwstate(struct thread_info *thread) -{ - /* - * On SMP systems, the VFP state is automatically saved at every - * context switch. We mark the thread VFP state as belonging to a - * non-existent CPU so that the saved one will be reloaded when - * needed. - */ - thread->vfpstate.hard.cpu = NR_CPUS; -} -#else void vfp_sync_hwstate(struct thread_info *thread) { unsigned int cpu = get_cpu(); @@ -490,9 +470,18 @@ void vfp_flush_hwstate(struct thread_info *thread) last_VFP_context[cpu] = NULL; } +#ifdef CONFIG_SMP + /* + * For SMP we still have to take care of the case where the thread + * migrates to another CPU and then back to the original CPU on which + * the last VFP user is still the same thread. Mark the thread VFP + * state as belonging to a non-existent CPU so that the saved one will + * be reloaded in the above case. + */ + thread->vfpstate.hard.cpu = NR_CPUS; +#endif put_cpu(); } -#endif #include <linux/smp.h> diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index 73c5c2b05f64..7f3c0a2e60cd 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -1802,7 +1802,8 @@ static int kvm_ia64_sync_dirty_log(struct kvm *kvm, { struct kvm_memory_slot *memslot; int r, i; - long n, base; + long base; + unsigned long n; unsigned long *dirty_bitmap = (unsigned long *)(kvm->arch.vm_base + offsetof(struct kvm_vm_data, kvm_mem_dirty_log)); @@ -1815,7 +1816,7 @@ static int kvm_ia64_sync_dirty_log(struct kvm *kvm, if (!memslot->dirty_bitmap) goto out; - n = ALIGN(memslot->npages, BITS_PER_LONG) / 8; + n = kvm_dirty_bitmap_bytes(memslot); base = memslot->base_gfn / BITS_PER_LONG; for (i = 0; i < n/sizeof(long); ++i) { @@ -1831,7 +1832,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) { int r; - int n; + unsigned long n; struct kvm_memory_slot *memslot; int is_dirty = 0; @@ -1850,7 +1851,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, if (is_dirty) { kvm_flush_remote_tlbs(kvm); memslot = &kvm->memslots->memslots[log->slot]; - n = ALIGN(memslot->npages, BITS_PER_LONG) / 8; + n = kvm_dirty_bitmap_bytes(memslot); memset(memslot->dirty_bitmap, 0, n); } r = 0; diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 25da07fd9f77..604af29b71ed 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -1004,7 +1004,8 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_vcpu *vcpu; ulong ga, ga_end; int is_dirty = 0; - int r, n; + int r; + unsigned long n; mutex_lock(&kvm->slots_lock); @@ -1022,7 +1023,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, kvm_for_each_vcpu(n, vcpu, kvm) kvmppc_mmu_pte_pflush(vcpu, ga, ga_end); - n = ALIGN(memslot->npages, BITS_PER_LONG) / 8; + n = kvm_dirty_bitmap_bytes(memslot); memset(memslot->dirty_bitmap, 0, n); } diff --git a/arch/sparc/include/asm/thread_info_64.h b/arch/sparc/include/asm/thread_info_64.h index 9e2d9447f2ad..4827a3aeac7f 100644 --- a/arch/sparc/include/asm/thread_info_64.h +++ b/arch/sparc/include/asm/thread_info_64.h @@ -111,7 +111,7 @@ struct thread_info { #define THREAD_SHIFT PAGE_SHIFT #endif /* PAGE_SHIFT == 13 */ -#define PREEMPT_ACTIVE 0x4000000 +#define PREEMPT_ACTIVE 0x10000000 /* * macros/functions for gaining access to the thread information structure diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c index 454ce3a25273..830d70a3e20b 100644 --- a/arch/sparc/kernel/irq_64.c +++ b/arch/sparc/kernel/irq_64.c @@ -22,6 +22,7 @@ #include <linux/seq_file.h> #include <linux/ftrace.h> #include <linux/irq.h> +#include <linux/kmemleak.h> #include <asm/ptrace.h> #include <asm/processor.h> @@ -46,6 +47,7 @@ #include "entry.h" #include "cpumap.h" +#include "kstack.h" #define NUM_IVECS (IMAP_INR + 1) @@ -712,24 +714,6 @@ void ack_bad_irq(unsigned int virt_irq) void *hardirq_stack[NR_CPUS]; void *softirq_stack[NR_CPUS]; -static __attribute__((always_inline)) void *set_hardirq_stack(void) -{ - void *orig_sp, *sp = hardirq_stack[smp_processor_id()]; - - __asm__ __volatile__("mov %%sp, %0" : "=r" (orig_sp)); - if (orig_sp < sp || - orig_sp > (sp + THREAD_SIZE)) { - sp += THREAD_SIZE - 192 - STACK_BIAS; - __asm__ __volatile__("mov %0, %%sp" : : "r" (sp)); - } - - return orig_sp; -} -static __attribute__((always_inline)) void restore_hardirq_stack(void *orig_sp) -{ - __asm__ __volatile__("mov %0, %%sp" : : "r" (orig_sp)); -} - void __irq_entry handler_irq(int irq, struct pt_regs *regs) { unsigned long pstate, bucket_pa; diff --git a/arch/sparc/kernel/kstack.h b/arch/sparc/kernel/kstack.h index 5247283d1c03..53dfb92e09fb 100644 --- a/arch/sparc/kernel/kstack.h +++ b/arch/sparc/kernel/kstack.h @@ -61,4 +61,23 @@ check_magic: } +static inline __attribute__((always_inline)) void *set_hardirq_stack(void) +{ + void *orig_sp, *sp = hardirq_stack[smp_processor_id()]; + + __asm__ __volatile__("mov %%sp, %0" : "=r" (orig_sp)); + if (orig_sp < sp || + orig_sp > (sp + THREAD_SIZE)) { + sp += THREAD_SIZE - 192 - STACK_BIAS; + __asm__ __volatile__("mov %0, %%sp" : : "r" (sp)); + } + + return orig_sp; +} + +static inline __attribute__((always_inline)) void restore_hardirq_stack(void *orig_sp) +{ + __asm__ __volatile__("mov %0, %%sp" : : "r" (orig_sp)); +} + #endif /* _KSTACK_H */ diff --git a/arch/sparc/kernel/nmi.c b/arch/sparc/kernel/nmi.c index 75a3d1a25356..a4bd7ba74c89 100644 --- a/arch/sparc/kernel/nmi.c +++ b/arch/sparc/kernel/nmi.c @@ -23,6 +23,8 @@ #include <asm/ptrace.h> #include <asm/pcr.h> +#include "kstack.h" + /* We don't have a real NMI on sparc64, but we can fake one * up using profiling counter overflow interrupts and interrupt * levels. @@ -92,6 +94,7 @@ static void die_nmi(const char *str, struct pt_regs *regs, int do_panic) notrace __kprobes void perfctr_irq(int irq, struct pt_regs *regs) { unsigned int sum, touched = 0; + void *orig_sp; clear_softint(1 << irq); @@ -99,6 +102,8 @@ notrace __kprobes void perfctr_irq(int irq, struct pt_regs *regs) nmi_enter(); + orig_sp = set_hardirq_stack(); + if (notify_die(DIE_NMI, "nmi", regs, 0, pt_regs_trap_type(regs), SIGINT) == NOTIFY_STOP) touched = 1; @@ -124,6 +129,8 @@ notrace __kprobes void perfctr_irq(int irq, struct pt_regs *regs) pcr_ops->write(pcr_enable); } + restore_hardirq_stack(orig_sp); + nmi_exit(); } diff --git a/arch/sparc/kernel/rtrap_64.S b/arch/sparc/kernel/rtrap_64.S index 83f1873c6c13..090b9e9ad5e3 100644 --- a/arch/sparc/kernel/rtrap_64.S +++ b/arch/sparc/kernel/rtrap_64.S @@ -130,7 +130,17 @@ rtrap_xcall: nop call trace_hardirqs_on nop - wrpr %l4, %pil + /* Do not actually set the %pil here. We will do that + * below after we clear PSTATE_IE in the %pstate register. + * If we re-enable interrupts here, we can recurse down + * the hardirq stack potentially endlessly, causing a + * stack overflow. + * + * It is tempting to put this test and trace_hardirqs_on + * call at the 'rt_continue' label, but that will not work + * as that path hits unconditionally and we do not want to + * execute this in NMI return paths, for example. + */ #endif rtrap_no_irq_enable: andcc %l1, TSTATE_PRIV, %l3 diff --git a/arch/sparc/kernel/unaligned_64.c b/arch/sparc/kernel/unaligned_64.c index ebce43018c49..c752c4c479bd 100644 --- a/arch/sparc/kernel/unaligned_64.c +++ b/arch/sparc/kernel/unaligned_64.c @@ -50,7 +50,7 @@ static inline enum direction decode_direction(unsigned int insn) } /* 16 = double-word, 8 = extra-word, 4 = word, 2 = half-word */ -static inline int decode_access_size(unsigned int insn) +static inline int decode_access_size(struct pt_regs *regs, unsigned int insn) { unsigned int tmp; @@ -66,7 +66,7 @@ static inline int decode_access_size(unsigned int insn) return 2; else { printk("Impossible unaligned trap. insn=%08x\n", insn); - die_if_kernel("Byte sized unaligned access?!?!", current_thread_info()->kregs); + die_if_kernel("Byte sized unaligned access?!?!", regs); /* GCC should never warn that control reaches the end * of this function without returning a value because @@ -286,7 +286,7 @@ static void log_unaligned(struct pt_regs *regs) asmlinkage void kernel_unaligned_trap(struct pt_regs *regs, unsigned int insn) { enum direction dir = decode_direction(insn); - int size = decode_access_size(insn); + int size = decode_access_size(regs, insn); int orig_asi, asi; current_thread_info()->kern_una_regs = regs; diff --git a/arch/sparc/lib/mcount.S b/arch/sparc/lib/mcount.S index 3753e3c6e176..3ad6cbdc2163 100644 --- a/arch/sparc/lib/mcount.S +++ b/arch/sparc/lib/mcount.S @@ -34,7 +34,7 @@ mcount: cmp %g1, %g2 be,pn %icc, 1f mov %i7, %g3 - save %sp, -128, %sp + save %sp, -176, %sp mov %g3, %o1 jmpl %g1, %o7 mov %i7, %o0 @@ -56,7 +56,7 @@ mcount: nop 5: mov %i7, %g2 mov %fp, %g3 - save %sp, -128, %sp + save %sp, -176, %sp mov %g2, %l0 ba,pt %xcc, ftrace_graph_caller mov %g3, %l1 @@ -85,7 +85,7 @@ ftrace_caller: lduw [%g1 + %lo(function_trace_stop)], %g1 brnz,pn %g1, ftrace_stub mov %fp, %g3 - save %sp, -128, %sp + save %sp, -176, %sp mov %g2, %o1 mov %g2, %l0 mov %g3, %l1 @@ -120,7 +120,7 @@ ENTRY(ftrace_graph_caller) END(ftrace_graph_caller) ENTRY(return_to_handler) - save %sp, -128, %sp + save %sp, -176, %sp call ftrace_return_to_handler mov %fp, %o0 jmpl %o0 + 8, %g0 diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 59b4556a5b92..e790bc1fbfa3 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -626,7 +626,7 @@ ia32_sys_call_table: .quad stub32_sigreturn .quad stub32_clone /* 120 */ .quad sys_setdomainname - .quad sys_uname + .quad sys_newuname .quad sys_modify_ldt .quad compat_sys_adjtimex .quad sys32_mprotect /* 125 */ diff --git a/arch/x86/kernel/dumpstack.h b/arch/x86/kernel/dumpstack.h index e39e77168a37..e1a93be4fd44 100644 --- a/arch/x86/kernel/dumpstack.h +++ b/arch/x86/kernel/dumpstack.h @@ -14,6 +14,8 @@ #define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :) #endif +#include <linux/uaccess.h> + extern void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, unsigned long *stack, unsigned long bp, char *log_lvl); @@ -42,8 +44,10 @@ static inline unsigned long rewind_frame_pointer(int n) get_bp(frame); #ifdef CONFIG_FRAME_POINTER - while (n--) - frame = frame->next_frame; + while (n--) { + if (probe_kernel_address(&frame->next_frame, frame)) + break; + } #endif return (unsigned long)frame; diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 48aeee8eefb0..19a8906bcaa2 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1490,8 +1490,8 @@ static int mmu_zap_unsync_children(struct kvm *kvm, for_each_sp(pages, sp, parents, i) { kvm_mmu_zap_page(kvm, sp); mmu_pages_clear_parents(&parents); + zapped++; } - zapped += pages.nr; kvm_mmu_pages_init(parent, &parents, &pages); } @@ -1542,14 +1542,16 @@ void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages) */ if (used_pages > kvm_nr_mmu_pages) { - while (used_pages > kvm_nr_mmu_pages) { + while (used_pages > kvm_nr_mmu_pages && + !list_empty(&kvm->arch.active_mmu_pages)) { struct kvm_mmu_page *page; page = container_of(kvm->arch.active_mmu_pages.prev, struct kvm_mmu_page, link); - kvm_mmu_zap_page(kvm, page); + used_pages -= kvm_mmu_zap_page(kvm, page); used_pages--; } + kvm_nr_mmu_pages = used_pages; kvm->arch.n_free_mmu_pages = 0; } else @@ -1596,7 +1598,8 @@ static void mmu_unshadow(struct kvm *kvm, gfn_t gfn) && !sp->role.invalid) { pgprintk("%s: zap %lx %x\n", __func__, gfn, sp->role.word); - kvm_mmu_zap_page(kvm, sp); + if (kvm_mmu_zap_page(kvm, sp)) + nn = bucket->first; } } } diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 445c59411ed0..2ba58206812a 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -706,29 +706,28 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) if (err) goto free_svm; + err = -ENOMEM; page = alloc_page(GFP_KERNEL); - if (!page) { - err = -ENOMEM; + if (!page) goto uninit; - } - err = -ENOMEM; msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER); if (!msrpm_pages) - goto uninit; + goto free_page1; nested_msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER); if (!nested_msrpm_pages) - goto uninit; - - svm->msrpm = page_address(msrpm_pages); - svm_vcpu_init_msrpm(svm->msrpm); + goto free_page2; hsave_page = alloc_page(GFP_KERNEL); if (!hsave_page) - goto uninit; + goto free_page3; + svm->nested.hsave = page_address(hsave_page); + svm->msrpm = page_address(msrpm_pages); + svm_vcpu_init_msrpm(svm->msrpm); + svm->nested.msrpm = page_address(nested_msrpm_pages); svm->vmcb = page_address(page); @@ -744,6 +743,12 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) return &svm->vcpu; +free_page3: + __free_pages(nested_msrpm_pages, MSRPM_ALLOC_ORDER); +free_page2: + __free_pages(msrpm_pages, MSRPM_ALLOC_ORDER); +free_page1: + __free_page(page); uninit: kvm_vcpu_uninit(&svm->vcpu); free_svm: diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 686492ed3079..bc933cfb4e66 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -77,6 +77,8 @@ module_param(emulate_invalid_guest_state, bool, S_IRUGO); #define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE) #define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE) +#define RMODE_GUEST_OWNED_EFLAGS_BITS (~(X86_EFLAGS_IOPL | X86_EFLAGS_VM)) + /* * These 2 parameters are used to config the controls for Pause-Loop Exiting: * ple_gap: upper bound on the amount of time between two successive @@ -131,7 +133,7 @@ struct vcpu_vmx { } host_state; struct { int vm86_active; - u8 save_iopl; + ulong save_rflags; struct kvm_save_segment { u16 selector; unsigned long base; @@ -818,18 +820,23 @@ static void vmx_fpu_deactivate(struct kvm_vcpu *vcpu) static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu) { - unsigned long rflags; + unsigned long rflags, save_rflags; rflags = vmcs_readl(GUEST_RFLAGS); - if (to_vmx(vcpu)->rmode.vm86_active) - rflags &= ~(unsigned long)(X86_EFLAGS_IOPL | X86_EFLAGS_VM); + if (to_vmx(vcpu)->rmode.vm86_active) { + rflags &= RMODE_GUEST_OWNED_EFLAGS_BITS; + save_rflags = to_vmx(vcpu)->rmode.save_rflags; + rflags |= save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS; + } return rflags; } static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) { - if (to_vmx(vcpu)->rmode.vm86_active) + if (to_vmx(vcpu)->rmode.vm86_active) { + to_vmx(vcpu)->rmode.save_rflags = rflags; rflags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM; + } vmcs_writel(GUEST_RFLAGS, rflags); } @@ -1483,8 +1490,8 @@ static void enter_pmode(struct kvm_vcpu *vcpu) vmcs_write32(GUEST_TR_AR_BYTES, vmx->rmode.tr.ar); flags = vmcs_readl(GUEST_RFLAGS); - flags &= ~(X86_EFLAGS_IOPL | X86_EFLAGS_VM); - flags |= (vmx->rmode.save_iopl << IOPL_SHIFT); + flags &= RMODE_GUEST_OWNED_EFLAGS_BITS; + flags |= vmx->rmode.save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS; vmcs_writel(GUEST_RFLAGS, flags); vmcs_writel(GUEST_CR4, (vmcs_readl(GUEST_CR4) & ~X86_CR4_VME) | @@ -1557,8 +1564,7 @@ static void enter_rmode(struct kvm_vcpu *vcpu) vmcs_write32(GUEST_TR_AR_BYTES, 0x008b); flags = vmcs_readl(GUEST_RFLAGS); - vmx->rmode.save_iopl - = (flags & X86_EFLAGS_IOPL) >> IOPL_SHIFT; + vmx->rmode.save_rflags = flags; flags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 24cd0ee896e9..3c4ca98ad27f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -433,8 +433,6 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) #ifdef CONFIG_X86_64 if (cr0 & 0xffffffff00000000UL) { - printk(KERN_DEBUG "set_cr0: 0x%lx #GP, reserved bits 0x%lx\n", - cr0, kvm_read_cr0(vcpu)); kvm_inject_gp(vcpu, 0); return; } @@ -443,14 +441,11 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) cr0 &= ~CR0_RESERVED_BITS; if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD)) { - printk(KERN_DEBUG "set_cr0: #GP, CD == 0 && NW == 1\n"); kvm_inject_gp(vcpu, 0); return; } if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE)) { - printk(KERN_DEBUG "set_cr0: #GP, set PG flag " - "and a clear PE flag\n"); kvm_inject_gp(vcpu, 0); return; } @@ -461,15 +456,11 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) int cs_db, cs_l; if (!is_pae(vcpu)) { - printk(KERN_DEBUG "set_cr0: #GP, start paging " - "in long mode while PAE is disabled\n"); kvm_inject_gp(vcpu, 0); return; } kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); if (cs_l) { - printk(KERN_DEBUG "set_cr0: #GP, start paging " - "in long mode while CS.L == 1\n"); kvm_inject_gp(vcpu, 0); return; @@ -477,8 +468,6 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) } else #endif if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->arch.cr3)) { - printk(KERN_DEBUG "set_cr0: #GP, pdptrs " - "reserved bits\n"); kvm_inject_gp(vcpu, 0); return; } @@ -505,28 +494,23 @@ void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE; if (cr4 & CR4_RESERVED_BITS) { - printk(KERN_DEBUG "set_cr4: #GP, reserved bits\n"); kvm_inject_gp(vcpu, 0); return; } if (is_long_mode(vcpu)) { if (!(cr4 & X86_CR4_PAE)) { - printk(KERN_DEBUG "set_cr4: #GP, clearing PAE while " - "in long mode\n"); kvm_inject_gp(vcpu, 0); return; } } else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE) && ((cr4 ^ old_cr4) & pdptr_bits) && !load_pdptrs(vcpu, vcpu->arch.cr3)) { - printk(KERN_DEBUG "set_cr4: #GP, pdptrs reserved bits\n"); kvm_inject_gp(vcpu, 0); return; } if (cr4 & X86_CR4_VMXE) { - printk(KERN_DEBUG "set_cr4: #GP, setting VMXE\n"); kvm_inject_gp(vcpu, 0); return; } @@ -547,21 +531,16 @@ void kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) if (is_long_mode(vcpu)) { if (cr3 & CR3_L_MODE_RESERVED_BITS) { - printk(KERN_DEBUG "set_cr3: #GP, reserved bits\n"); kvm_inject_gp(vcpu, 0); return; } } else { if (is_pae(vcpu)) { if (cr3 & CR3_PAE_RESERVED_BITS) { - printk(KERN_DEBUG - "set_cr3: #GP, reserved bits\n"); kvm_inject_gp(vcpu, 0); return; } if (is_paging(vcpu) && !load_pdptrs(vcpu, cr3)) { - printk(KERN_DEBUG "set_cr3: #GP, pdptrs " - "reserved bits\n"); kvm_inject_gp(vcpu, 0); return; } @@ -593,7 +572,6 @@ EXPORT_SYMBOL_GPL(kvm_set_cr3); void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8) { if (cr8 & CR8_RESERVED_BITS) { - printk(KERN_DEBUG "set_cr8: #GP, reserved bits 0x%lx\n", cr8); kvm_inject_gp(vcpu, 0); return; } @@ -649,15 +627,12 @@ static u32 emulated_msrs[] = { static void set_efer(struct kvm_vcpu *vcpu, u64 efer) { if (efer & efer_reserved_bits) { - printk(KERN_DEBUG "set_efer: 0x%llx #GP, reserved bits\n", - efer); kvm_inject_gp(vcpu, 0); return; } if (is_paging(vcpu) && (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME)) { - printk(KERN_DEBUG "set_efer: #GP, change LME while paging\n"); kvm_inject_gp(vcpu, 0); return; } @@ -667,7 +642,6 @@ static void set_efer(struct kvm_vcpu *vcpu, u64 efer) feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); if (!feat || !(feat->edx & bit(X86_FEATURE_FXSR_OPT))) { - printk(KERN_DEBUG "set_efer: #GP, enable FFXSR w/o CPUID capability\n"); kvm_inject_gp(vcpu, 0); return; } @@ -678,7 +652,6 @@ static void set_efer(struct kvm_vcpu *vcpu, u64 efer) feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM))) { - printk(KERN_DEBUG "set_efer: #GP, enable SVM w/o SVM\n"); kvm_inject_gp(vcpu, 0); return; } @@ -967,9 +940,13 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 data) if (msr >= MSR_IA32_MC0_CTL && msr < MSR_IA32_MC0_CTL + 4 * bank_num) { u32 offset = msr - MSR_IA32_MC0_CTL; - /* only 0 or all 1s can be written to IA32_MCi_CTL */ + /* only 0 or all 1s can be written to IA32_MCi_CTL + * some Linux kernels though clear bit 10 in bank 4 to + * workaround a BIOS/GART TBL issue on AMD K8s, ignore + * this to avoid an uncatched #GP in the guest + */ if ((offset & 0x3) == 0 && - data != 0 && data != ~(u64)0) + data != 0 && (data | (1 << 10)) != ~(u64)0) return -1; vcpu->arch.mce_banks[offset] = data; break; @@ -2635,8 +2612,9 @@ static int kvm_vm_ioctl_reinject(struct kvm *kvm, int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) { - int r, n, i; + int r, i; struct kvm_memory_slot *memslot; + unsigned long n; unsigned long is_dirty = 0; unsigned long *dirty_bitmap = NULL; @@ -2651,7 +2629,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, if (!memslot->dirty_bitmap) goto out; - n = ALIGN(memslot->npages, BITS_PER_LONG) / 8; + n = kvm_dirty_bitmap_bytes(memslot); r = -ENOMEM; dirty_bitmap = vmalloc(n); @@ -4483,7 +4461,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) kvm_set_cr8(vcpu, kvm_run->cr8); if (vcpu->arch.pio.cur_count) { + vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); r = complete_pio(vcpu); + srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); if (r) goto out; } @@ -5146,6 +5126,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason) int ret = 0; u32 old_tss_base = get_segment_base(vcpu, VCPU_SREG_TR); u16 old_tss_sel = get_segment_selector(vcpu, VCPU_SREG_TR); + u32 desc_limit; old_tss_base = kvm_mmu_gva_to_gpa_write(vcpu, old_tss_base, NULL); @@ -5168,7 +5149,10 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason) } } - if (!nseg_desc.p || get_desc_limit(&nseg_desc) < 0x67) { + desc_limit = get_desc_limit(&nseg_desc); + if (!nseg_desc.p || + ((desc_limit < 0x67 && (nseg_desc.type & 8)) || + desc_limit < 0x2b)) { kvm_queue_exception_e(vcpu, TS_VECTOR, tss_selector & 0xfffc); return 1; } diff --git a/drivers/char/pcmcia/cm4000_cs.c b/drivers/char/pcmcia/cm4000_cs.c index c9bc896d68af..90b199f97bec 100644 --- a/drivers/char/pcmcia/cm4000_cs.c +++ b/drivers/char/pcmcia/cm4000_cs.c @@ -1026,14 +1026,16 @@ static ssize_t cmm_read(struct file *filp, __user char *buf, size_t count, xoutb(0, REG_FLAGS1(iobase)); /* clear detectCMM */ /* last check before exit */ - if (!io_detect_cm4000(iobase, dev)) - count = -ENODEV; + if (!io_detect_cm4000(iobase, dev)) { + rc = -ENODEV; + goto release_io; + } if (test_bit(IS_INVREV, &dev->flags) && count > 0) str_invert_revert(dev->rbuf, count); if (copy_to_user(buf, dev->rbuf, count)) - return -EFAULT; + rc = -EFAULT; release_io: clear_bit(LOCK_IO, &dev->flags); diff --git a/drivers/gpu/drm/drm_stub.c b/drivers/gpu/drm/drm_stub.c index b743411d8144..a0c365f2e521 100644 --- a/drivers/gpu/drm/drm_stub.c +++ b/drivers/gpu/drm/drm_stub.c @@ -516,8 +516,6 @@ void drm_put_dev(struct drm_device *dev) } driver = dev->driver; - drm_vblank_cleanup(dev); - drm_lastclose(dev); if (drm_core_has_MTRR(dev) && drm_core_has_AGP(dev) && @@ -537,6 +535,8 @@ void drm_put_dev(struct drm_device *dev) dev->agp = NULL; } + drm_vblank_cleanup(dev); + list_for_each_entry_safe(r_list, list_temp, &dev->maplist, head) drm_rmmap(dev, r_list->map); drm_ht_remove(&dev->map_hash); diff --git a/drivers/gpu/drm/radeon/atom.c b/drivers/gpu/drm/radeon/atom.c index bcec2d79636e..1d569830ed99 100644 --- a/drivers/gpu/drm/radeon/atom.c +++ b/drivers/gpu/drm/radeon/atom.c @@ -908,11 +908,16 @@ static void atom_op_shl(atom_exec_context *ctx, int *ptr, int arg) uint8_t attr = U8((*ptr)++), shift; uint32_t saved, dst; int dptr = *ptr; + uint32_t dst_align = atom_dst_to_src[(attr >> 3) & 7][(attr >> 6) & 3]; SDEBUG(" dst: "); dst = atom_get_dst(ctx, arg, attr, ptr, &saved, 1); + /* op needs to full dst value */ + dst = saved; shift = atom_get_src(ctx, attr, ptr); SDEBUG(" shift: %d\n", shift); dst <<= shift; + dst &= atom_arg_mask[dst_align]; + dst >>= atom_arg_shift[dst_align]; SDEBUG(" dst: "); atom_put_dst(ctx, arg, attr, &dptr, dst, saved); } @@ -922,11 +927,16 @@ static void atom_op_shr(atom_exec_context *ctx, int *ptr, int arg) uint8_t attr = U8((*ptr)++), shift; uint32_t saved, dst; int dptr = *ptr; + uint32_t dst_align = atom_dst_to_src[(attr >> 3) & 7][(attr >> 6) & 3]; SDEBUG(" dst: "); dst = atom_get_dst(ctx, arg, attr, ptr, &saved, 1); + /* op needs to full dst value */ + dst = saved; shift = atom_get_src(ctx, attr, ptr); SDEBUG(" shift: %d\n", shift); dst >>= shift; + dst &= atom_arg_mask[dst_align]; + dst >>= atom_arg_shift[dst_align]; SDEBUG(" dst: "); atom_put_dst(ctx, arg, attr, &dptr, dst, saved); } diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c index fd4ef6d18849..a87990b3ae84 100644 --- a/drivers/gpu/drm/radeon/atombios_crtc.c +++ b/drivers/gpu/drm/radeon/atombios_crtc.c @@ -521,6 +521,10 @@ static u32 atombios_adjust_pll(struct drm_crtc *crtc, /* DVO wants 2x pixel clock if the DVO chip is in 12 bit mode */ if (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DVO1) adjusted_clock = mode->clock * 2; + if (radeon_encoder->active_device & (ATOM_DEVICE_TV_SUPPORT)) { + pll->algo = PLL_ALGO_LEGACY; + pll->flags |= RADEON_PLL_PREFER_CLOSEST_LOWER; + } } else { if (encoder->encoder_type != DRM_MODE_ENCODER_DAC) pll->flags |= RADEON_PLL_NO_ODD_POST_DIV; diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index c9580497ede4..d7388fdb6d0b 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -2891,7 +2891,7 @@ static int r100_cs_track_texture_check(struct radeon_device *rdev, { struct radeon_bo *robj; unsigned long size; - unsigned u, i, w, h; + unsigned u, i, w, h, d; int ret; for (u = 0; u < track->num_texture; u++) { @@ -2923,20 +2923,25 @@ static int r100_cs_track_texture_check(struct radeon_device *rdev, h = h / (1 << i); if (track->textures[u].roundup_h) h = roundup_pow_of_two(h); + if (track->textures[u].tex_coord_type == 1) { + d = (1 << track->textures[u].txdepth) / (1 << i); + if (!d) + d = 1; + } else { + d = 1; + } if (track->textures[u].compress_format) { - size += r100_track_compress_size(track->textures[u].compress_format, w, h); + size += r100_track_compress_size(track->textures[u].compress_format, w, h) * d; /* compressed textures are block based */ } else - size += w * h; + size += w * h * d; } size *= track->textures[u].cpp; switch (track->textures[u].tex_coord_type) { case 0: - break; case 1: - size *= (1 << track->textures[u].txdepth); break; case 2: if (track->separate_cube) { @@ -3007,7 +3012,11 @@ int r100_cs_track_check(struct radeon_device *rdev, struct r100_cs_track *track) } } prim_walk = (track->vap_vf_cntl >> 4) & 0x3; - nverts = (track->vap_vf_cntl >> 16) & 0xFFFF; + if (track->vap_vf_cntl & (1 << 14)) { + nverts = track->vap_alt_nverts; + } else { + nverts = (track->vap_vf_cntl >> 16) & 0xFFFF; + } switch (prim_walk) { case 1: for (i = 0; i < track->num_arrays; i++) { diff --git a/drivers/gpu/drm/radeon/r100_track.h b/drivers/gpu/drm/radeon/r100_track.h index b27a6999d219..fadfe68de9cc 100644 --- a/drivers/gpu/drm/radeon/r100_track.h +++ b/drivers/gpu/drm/radeon/r100_track.h @@ -64,6 +64,7 @@ struct r100_cs_track { unsigned maxy; unsigned vtx_size; unsigned vap_vf_cntl; + unsigned vap_alt_nverts; unsigned immd_dwords; unsigned num_arrays; unsigned max_indx; diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c index 2b9affe754ce..bd75f99bd65e 100644 --- a/drivers/gpu/drm/radeon/r300.c +++ b/drivers/gpu/drm/radeon/r300.c @@ -730,6 +730,12 @@ static int r300_packet0_check(struct radeon_cs_parser *p, /* VAP_VF_MAX_VTX_INDX */ track->max_indx = idx_value & 0x00FFFFFFUL; break; + case 0x2088: + /* VAP_ALT_NUM_VERTICES - only valid on r500 */ + if (p->rdev->family < CHIP_RV515) + goto fail; + track->vap_alt_nverts = idx_value & 0xFFFFFF; + break; case 0x43E4: /* SC_SCISSOR1 */ track->maxy = ((idx_value >> 13) & 0x1FFF) + 1; @@ -767,7 +773,6 @@ static int r300_packet0_check(struct radeon_cs_parser *p, tmp = idx_value & ~(0x7 << 16); tmp |= tile_flags; ib[idx] = tmp; - i = (reg - 0x4E38) >> 2; track->cb[i].pitch = idx_value & 0x3FFE; switch (((idx_value >> 21) & 0xF)) { @@ -1052,11 +1057,13 @@ static int r300_packet0_check(struct radeon_cs_parser *p, break; /* fallthrough do not move */ default: - printk(KERN_ERR "Forbidden register 0x%04X in cs at %d\n", - reg, idx); - return -EINVAL; + goto fail; } return 0; +fail: + printk(KERN_ERR "Forbidden register 0x%04X in cs at %d\n", + reg, idx); + return -EINVAL; } static int r300_packet3_check(struct radeon_cs_parser *p, diff --git a/drivers/gpu/drm/radeon/r600_audio.c b/drivers/gpu/drm/radeon/r600_audio.c index dac7042b797e..1d898051c631 100644 --- a/drivers/gpu/drm/radeon/r600_audio.c +++ b/drivers/gpu/drm/radeon/r600_audio.c @@ -35,7 +35,7 @@ */ static int r600_audio_chipset_supported(struct radeon_device *rdev) { - return rdev->family >= CHIP_R600 + return (rdev->family >= CHIP_R600 && rdev->family < CHIP_CEDAR) || rdev->family == CHIP_RS600 || rdev->family == CHIP_RS690 || rdev->family == CHIP_RS740; diff --git a/drivers/gpu/drm/radeon/r600_hdmi.c b/drivers/gpu/drm/radeon/r600_hdmi.c index 029fa1406d1d..2616b822ba68 100644 --- a/drivers/gpu/drm/radeon/r600_hdmi.c +++ b/drivers/gpu/drm/radeon/r600_hdmi.c @@ -314,6 +314,9 @@ void r600_hdmi_setmode(struct drm_encoder *encoder, struct drm_display_mode *mod struct radeon_device *rdev = dev->dev_private; uint32_t offset = to_radeon_encoder(encoder)->hdmi_offset; + if (ASIC_IS_DCE4(rdev)) + return; + if (!offset) return; @@ -484,6 +487,9 @@ void r600_hdmi_enable(struct drm_encoder *encoder) struct radeon_device *rdev = dev->dev_private; struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder); + if (ASIC_IS_DCE4(rdev)) + return; + if (!radeon_encoder->hdmi_offset) { r600_hdmi_assign_block(encoder); if (!radeon_encoder->hdmi_offset) { @@ -525,6 +531,9 @@ void r600_hdmi_disable(struct drm_encoder *encoder) struct radeon_device *rdev = dev->dev_private; struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder); + if (ASIC_IS_DCE4(rdev)) + return; + if (!radeon_encoder->hdmi_offset) { dev_err(rdev->dev, "Disabling not enabled HDMI\n"); return; diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c index 3fba50540f72..1331351c5178 100644 --- a/drivers/gpu/drm/radeon/radeon_connectors.c +++ b/drivers/gpu/drm/radeon/radeon_connectors.c @@ -162,12 +162,14 @@ radeon_connector_analog_encoder_conflict_solve(struct drm_connector *connector, { struct drm_device *dev = connector->dev; struct drm_connector *conflict; + struct radeon_connector *radeon_conflict; int i; list_for_each_entry(conflict, &dev->mode_config.connector_list, head) { if (conflict == connector) continue; + radeon_conflict = to_radeon_connector(conflict); for (i = 0; i < DRM_CONNECTOR_MAX_ENCODER; i++) { if (conflict->encoder_ids[i] == 0) break; @@ -177,6 +179,9 @@ radeon_connector_analog_encoder_conflict_solve(struct drm_connector *connector, if (conflict->status != connector_status_connected) continue; + if (radeon_conflict->use_digital) + continue; + if (priority == true) { DRM_INFO("1: conflicting encoders switching off %s\n", drm_get_connector_name(conflict)); DRM_INFO("in favor of %s\n", drm_get_connector_name(connector)); @@ -287,6 +292,7 @@ int radeon_connector_set_property(struct drm_connector *connector, struct drm_pr if (property == rdev->mode_info.coherent_mode_property) { struct radeon_encoder_atom_dig *dig; + bool new_coherent_mode; /* need to find digital encoder on connector */ encoder = radeon_find_encoder(connector, DRM_MODE_ENCODER_TMDS); @@ -299,8 +305,11 @@ int radeon_connector_set_property(struct drm_connector *connector, struct drm_pr return 0; dig = radeon_encoder->enc_priv; - dig->coherent_mode = val ? true : false; - radeon_property_change_mode(&radeon_encoder->base); + new_coherent_mode = val ? true : false; + if (dig->coherent_mode != new_coherent_mode) { + dig->coherent_mode = new_coherent_mode; + radeon_property_change_mode(&radeon_encoder->base); + } } if (property == rdev->mode_info.tv_std_property) { diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index bddf17f97da8..7b629e305560 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -36,6 +36,54 @@ #include "radeon.h" #include "atom.h" +static const char radeon_family_name[][16] = { + "R100", + "RV100", + "RS100", + "RV200", + "RS200", + "R200", + "RV250", + "RS300", + "RV280", + "R300", + "R350", + "RV350", + "RV380", + "R420", + "R423", + "RV410", + "RS400", + "RS480", + "RS600", + "RS690", + "RS740", + "RV515", + "R520", + "RV530", + "RV560", + "RV570", + "R580", + "R600", + "RV610", + "RV630", + "RV670", + "RV620", + "RV635", + "RS780", + "RS880", + "RV770", + "RV730", + "RV710", + "RV740", + "CEDAR", + "REDWOOD", + "JUNIPER", + "CYPRESS", + "HEMLOCK", + "LAST", +}; + /* * Clear GPU surface registers. */ @@ -526,7 +574,6 @@ int radeon_device_init(struct radeon_device *rdev, int r; int dma_bits; - DRM_INFO("radeon: Initializing kernel modesetting.\n"); rdev->shutdown = false; rdev->dev = &pdev->dev; rdev->ddev = ddev; @@ -538,6 +585,10 @@ int radeon_device_init(struct radeon_device *rdev, rdev->mc.gtt_size = radeon_gart_size * 1024 * 1024; rdev->gpu_lockup = false; rdev->accel_working = false; + + DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X).\n", + radeon_family_name[rdev->family], pdev->vendor, pdev->device); + /* mutex initialization are all done here so we * can recall function without having locking issues */ mutex_init(&rdev->cs_mutex); diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index 055a51732dcb..4b05563d99e1 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -43,9 +43,10 @@ * - 2.0.0 - initial interface * - 2.1.0 - add square tiling interface * - 2.2.0 - add r6xx/r7xx const buffer support + * - 2.3.0 - add MSPOS + 3D texture + r500 VAP regs */ #define KMS_DRIVER_MAJOR 2 -#define KMS_DRIVER_MINOR 2 +#define KMS_DRIVER_MINOR 3 #define KMS_DRIVER_PATCHLEVEL 0 int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags); int radeon_driver_unload_kms(struct drm_device *dev); diff --git a/drivers/gpu/drm/radeon/radeon_encoders.c b/drivers/gpu/drm/radeon/radeon_encoders.c index c52fc3080b67..30293bec0801 100644 --- a/drivers/gpu/drm/radeon/radeon_encoders.c +++ b/drivers/gpu/drm/radeon/radeon_encoders.c @@ -865,6 +865,8 @@ atombios_dig_transmitter_setup(struct drm_encoder *encoder, int action, uint8_t else if (radeon_encoder->devices & (ATOM_DEVICE_DFP_SUPPORT)) { if (dig->coherent_mode) args.v3.acConfig.fCoherentMode = 1; + if (radeon_encoder->pixel_clock > 165000) + args.v3.acConfig.fDualLinkConnector = 1; } } else if (ASIC_IS_DCE32(rdev)) { args.v2.acConfig.ucEncoderSel = dig->dig_encoder; @@ -888,6 +890,8 @@ atombios_dig_transmitter_setup(struct drm_encoder *encoder, int action, uint8_t else if (radeon_encoder->devices & (ATOM_DEVICE_DFP_SUPPORT)) { if (dig->coherent_mode) args.v2.acConfig.fCoherentMode = 1; + if (radeon_encoder->pixel_clock > 165000) + args.v2.acConfig.fDualLinkConnector = 1; } } else { args.v1.ucConfig = ATOM_TRANSMITTER_CONFIG_CLKSRC_PPLL; @@ -1373,8 +1377,12 @@ radeon_atom_encoder_mode_set(struct drm_encoder *encoder, case ENCODER_OBJECT_ID_INTERNAL_DAC2: case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2: atombios_dac_setup(encoder, ATOM_ENABLE); - if (radeon_encoder->active_device & (ATOM_DEVICE_TV_SUPPORT | ATOM_DEVICE_CV_SUPPORT)) - atombios_tv_setup(encoder, ATOM_ENABLE); + if (radeon_encoder->devices & (ATOM_DEVICE_TV_SUPPORT | ATOM_DEVICE_CV_SUPPORT)) { + if (radeon_encoder->active_device & (ATOM_DEVICE_TV_SUPPORT | ATOM_DEVICE_CV_SUPPORT)) + atombios_tv_setup(encoder, ATOM_ENABLE); + else + atombios_tv_setup(encoder, ATOM_DISABLE); + } break; } atombios_apply_encoder_quirks(encoder, adjusted_mode); diff --git a/drivers/gpu/drm/radeon/radeon_family.h b/drivers/gpu/drm/radeon/radeon_family.h index 93c7d5d41914..e329066dcabd 100644 --- a/drivers/gpu/drm/radeon/radeon_family.h +++ b/drivers/gpu/drm/radeon/radeon_family.h @@ -36,7 +36,7 @@ * Radeon chip families */ enum radeon_family { - CHIP_R100, + CHIP_R100 = 0, CHIP_RV100, CHIP_RS100, CHIP_RV200, @@ -99,4 +99,5 @@ enum radeon_chip_flags { RADEON_IS_PCI = 0x00800000UL, RADEON_IS_IGPGART = 0x01000000UL, }; + #endif diff --git a/drivers/gpu/drm/radeon/reg_srcs/r300 b/drivers/gpu/drm/radeon/reg_srcs/r300 index 19c4663fa9c6..1e97b2d129fd 100644 --- a/drivers/gpu/drm/radeon/reg_srcs/r300 +++ b/drivers/gpu/drm/radeon/reg_srcs/r300 @@ -125,6 +125,8 @@ r300 0x4f60 0x4000 GB_VAP_RASTER_VTX_FMT_0 0x4004 GB_VAP_RASTER_VTX_FMT_1 0x4008 GB_ENABLE +0x4010 GB_MSPOS0 +0x4014 GB_MSPOS1 0x401C GB_SELECT 0x4020 GB_AA_CONFIG 0x4024 GB_FIFO_SIZE diff --git a/drivers/gpu/drm/radeon/reg_srcs/r420 b/drivers/gpu/drm/radeon/reg_srcs/r420 index 989f7a020832..e958980d00f1 100644 --- a/drivers/gpu/drm/radeon/reg_srcs/r420 +++ b/drivers/gpu/drm/radeon/reg_srcs/r420 @@ -125,6 +125,8 @@ r420 0x4f60 0x4000 GB_VAP_RASTER_VTX_FMT_0 0x4004 GB_VAP_RASTER_VTX_FMT_1 0x4008 GB_ENABLE +0x4010 GB_MSPOS0 +0x4014 GB_MSPOS1 0x401C GB_SELECT 0x4020 GB_AA_CONFIG 0x4024 GB_FIFO_SIZE diff --git a/drivers/gpu/drm/radeon/reg_srcs/rs600 b/drivers/gpu/drm/radeon/reg_srcs/rs600 index 6801b865d1c4..83e8bc0c2bb2 100644 --- a/drivers/gpu/drm/radeon/reg_srcs/rs600 +++ b/drivers/gpu/drm/radeon/reg_srcs/rs600 @@ -125,6 +125,8 @@ rs600 0x6d40 0x4000 GB_VAP_RASTER_VTX_FMT_0 0x4004 GB_VAP_RASTER_VTX_FMT_1 0x4008 GB_ENABLE +0x4010 GB_MSPOS0 +0x4014 GB_MSPOS1 0x401C GB_SELECT 0x4020 GB_AA_CONFIG 0x4024 GB_FIFO_SIZE diff --git a/drivers/gpu/drm/radeon/reg_srcs/rv515 b/drivers/gpu/drm/radeon/reg_srcs/rv515 index 38abf63bf2cd..1e46233985eb 100644 --- a/drivers/gpu/drm/radeon/reg_srcs/rv515 +++ b/drivers/gpu/drm/radeon/reg_srcs/rv515 @@ -35,6 +35,7 @@ rv515 0x6d40 0x1DA8 VAP_VPORT_ZSCALE 0x1DAC VAP_VPORT_ZOFFSET 0x2080 VAP_CNTL +0x208C VAP_INDEX_OFFSET 0x2090 VAP_OUT_VTX_FMT_0 0x2094 VAP_OUT_VTX_FMT_1 0x20B0 VAP_VTE_CNTL @@ -158,6 +159,8 @@ rv515 0x6d40 0x4000 GB_VAP_RASTER_VTX_FMT_0 0x4004 GB_VAP_RASTER_VTX_FMT_1 0x4008 GB_ENABLE +0x4010 GB_MSPOS0 +0x4014 GB_MSPOS1 0x401C GB_SELECT 0x4020 GB_AA_CONFIG 0x4024 GB_FIFO_SIZE diff --git a/drivers/gpu/drm/radeon/rs600.c b/drivers/gpu/drm/radeon/rs600.c index abf824c2123d..a81bc7a21e14 100644 --- a/drivers/gpu/drm/radeon/rs600.c +++ b/drivers/gpu/drm/radeon/rs600.c @@ -159,7 +159,7 @@ void rs600_gart_tlb_flush(struct radeon_device *rdev) WREG32_MC(R_000100_MC_PT0_CNTL, tmp); tmp = RREG32_MC(R_000100_MC_PT0_CNTL); - tmp |= S_000100_INVALIDATE_ALL_L1_TLBS(1) & S_000100_INVALIDATE_L2_CACHE(1); + tmp |= S_000100_INVALIDATE_ALL_L1_TLBS(1) | S_000100_INVALIDATE_L2_CACHE(1); WREG32_MC(R_000100_MC_PT0_CNTL, tmp); tmp = RREG32_MC(R_000100_MC_PT0_CNTL); diff --git a/drivers/isdn/gigaset/bas-gigaset.c b/drivers/isdn/gigaset/bas-gigaset.c index 0be15c70c16d..47a5ffec55a3 100644 --- a/drivers/isdn/gigaset/bas-gigaset.c +++ b/drivers/isdn/gigaset/bas-gigaset.c @@ -14,11 +14,6 @@ */ #include "gigaset.h" - -#include <linux/errno.h> -#include <linux/init.h> -#include <linux/slab.h> -#include <linux/timer.h> #include <linux/usb.h> #include <linux/module.h> #include <linux/moduleparam.h> diff --git a/drivers/isdn/gigaset/capi.c b/drivers/isdn/gigaset/capi.c index eb7e27105a82..964a55fb1486 100644 --- a/drivers/isdn/gigaset/capi.c +++ b/drivers/isdn/gigaset/capi.c @@ -12,8 +12,6 @@ */ #include "gigaset.h" -#include <linux/slab.h> -#include <linux/ctype.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/isdn/capilli.h> diff --git a/drivers/isdn/gigaset/common.c b/drivers/isdn/gigaset/common.c index 0b39b387c125..f6f45f221920 100644 --- a/drivers/isdn/gigaset/common.c +++ b/drivers/isdn/gigaset/common.c @@ -14,10 +14,8 @@ */ #include "gigaset.h" -#include <linux/ctype.h> #include <linux/module.h> #include <linux/moduleparam.h> -#include <linux/slab.h> /* Version Information */ #define DRIVER_AUTHOR "Hansjoerg Lipp <hjlipp@web.de>, Tilman Schmidt <tilman@imap.cc>, Stefan Eilers" diff --git a/drivers/isdn/gigaset/gigaset.h b/drivers/isdn/gigaset/gigaset.h index 9ef5b0463fd5..05947f9c1849 100644 --- a/drivers/isdn/gigaset/gigaset.h +++ b/drivers/isdn/gigaset/gigaset.h @@ -20,11 +20,12 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/kernel.h> +#include <linux/sched.h> #include <linux/compiler.h> #include <linux/types.h> +#include <linux/ctype.h> #include <linux/slab.h> #include <linux/spinlock.h> -#include <linux/usb.h> #include <linux/skbuff.h> #include <linux/netdevice.h> #include <linux/ppp_defs.h> diff --git a/drivers/isdn/gigaset/i4l.c b/drivers/isdn/gigaset/i4l.c index c99fb9790a13..c22e5ace8276 100644 --- a/drivers/isdn/gigaset/i4l.c +++ b/drivers/isdn/gigaset/i4l.c @@ -15,7 +15,6 @@ #include "gigaset.h" #include <linux/isdnif.h> -#include <linux/slab.h> #define HW_HDR_LEN 2 /* Header size used to store ack info */ diff --git a/drivers/isdn/gigaset/interface.c b/drivers/isdn/gigaset/interface.c index f0dc6c9cc283..c9f28dd40d5c 100644 --- a/drivers/isdn/gigaset/interface.c +++ b/drivers/isdn/gigaset/interface.c @@ -13,7 +13,6 @@ #include "gigaset.h" #include <linux/gigaset_dev.h> -#include <linux/tty.h> #include <linux/tty_flip.h> /*** our ioctls ***/ diff --git a/drivers/isdn/gigaset/proc.c b/drivers/isdn/gigaset/proc.c index b69f73a0668f..b943efbff44d 100644 --- a/drivers/isdn/gigaset/proc.c +++ b/drivers/isdn/gigaset/proc.c @@ -14,7 +14,6 @@ */ #include "gigaset.h" -#include <linux/ctype.h> static ssize_t show_cidmode(struct device *dev, struct device_attribute *attr, char *buf) diff --git a/drivers/isdn/gigaset/ser-gigaset.c b/drivers/isdn/gigaset/ser-gigaset.c index 8b0afd203a07..e96c0586886c 100644 --- a/drivers/isdn/gigaset/ser-gigaset.c +++ b/drivers/isdn/gigaset/ser-gigaset.c @@ -11,13 +11,10 @@ */ #include "gigaset.h" - #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/platform_device.h> -#include <linux/tty.h> #include <linux/completion.h> -#include <linux/slab.h> /* Version Information */ #define DRIVER_AUTHOR "Tilman Schmidt" diff --git a/drivers/isdn/gigaset/usb-gigaset.c b/drivers/isdn/gigaset/usb-gigaset.c index 9430a2bbb523..76dbb20f3065 100644 --- a/drivers/isdn/gigaset/usb-gigaset.c +++ b/drivers/isdn/gigaset/usb-gigaset.c @@ -16,10 +16,6 @@ */ #include "gigaset.h" - -#include <linux/errno.h> -#include <linux/init.h> -#include <linux/slab.h> #include <linux/usb.h> #include <linux/module.h> #include <linux/moduleparam.h> diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index e3e9a36ea3b7..20e48401910e 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -1650,8 +1650,8 @@ static sector_t raid5_compute_sector(raid5_conf_t *conf, sector_t r_sector, int previous, int *dd_idx, struct stripe_head *sh) { - long stripe; - unsigned long chunk_number; + sector_t stripe; + sector_t chunk_number; unsigned int chunk_offset; int pd_idx, qd_idx; int ddf_layout = 0; @@ -1671,17 +1671,12 @@ static sector_t raid5_compute_sector(raid5_conf_t *conf, sector_t r_sector, */ chunk_offset = sector_div(r_sector, sectors_per_chunk); chunk_number = r_sector; - BUG_ON(r_sector != chunk_number); /* * Compute the stripe number */ - stripe = chunk_number / data_disks; - - /* - * Compute the data disk and parity disk indexes inside the stripe - */ - *dd_idx = chunk_number % data_disks; + stripe = chunk_number; + *dd_idx = sector_div(stripe, data_disks); /* * Select the parity disk based on the user selected algorithm. @@ -1870,14 +1865,14 @@ static sector_t compute_blocknr(struct stripe_head *sh, int i, int previous) : conf->algorithm; sector_t stripe; int chunk_offset; - int chunk_number, dummy1, dd_idx = i; + sector_t chunk_number; + int dummy1, dd_idx = i; sector_t r_sector; struct stripe_head sh2; chunk_offset = sector_div(new_sector, sectors_per_chunk); stripe = new_sector; - BUG_ON(new_sector != stripe); if (i == sh->pd_idx) return 0; @@ -1970,7 +1965,7 @@ static sector_t compute_blocknr(struct stripe_head *sh, int i, int previous) } chunk_number = stripe * data_disks + i; - r_sector = (sector_t)chunk_number * sectors_per_chunk + chunk_offset; + r_sector = chunk_number * sectors_per_chunk + chunk_offset; check = raid5_compute_sector(conf, r_sector, previous, &dummy1, &sh2); diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c index 73b260c3c654..5c98f7c22425 100644 --- a/drivers/net/forcedeth.c +++ b/drivers/net/forcedeth.c @@ -5899,7 +5899,7 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i /* Limit the number of tx's outstanding for hw bug */ if (id->driver_data & DEV_NEED_TX_LIMIT) { np->tx_limit = 1; - if ((id->driver_data & DEV_NEED_TX_LIMIT2) && + if (((id->driver_data & DEV_NEED_TX_LIMIT2) == DEV_NEED_TX_LIMIT2) && pci_dev->revision >= 0xA2) np->tx_limit = 0; } diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 96c39bddc78c..43265207d463 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -387,6 +387,10 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) } } + /* Orphan the skb - required as we might hang on to it + * for indefinite time. */ + skb_orphan(skb); + /* Enqueue packet */ skb_queue_tail(&tun->socket.sk->sk_receive_queue, skb); dev->trans_start = jiffies; diff --git a/drivers/net/wan/hdlc_ppp.c b/drivers/net/wan/hdlc_ppp.c index b9b9d6b01c0b..941f053e650e 100644 --- a/drivers/net/wan/hdlc_ppp.c +++ b/drivers/net/wan/hdlc_ppp.c @@ -628,9 +628,15 @@ static void ppp_stop(struct net_device *dev) ppp_cp_event(dev, PID_LCP, STOP, 0, 0, 0, NULL); } +static void ppp_close(struct net_device *dev) +{ + ppp_tx_flush(); +} + static struct hdlc_proto proto = { .start = ppp_start, .stop = ppp_stop, + .close = ppp_close, .type_trans = ppp_type_trans, .ioctl = ppp_ioctl, .netif_rx = ppp_rx, diff --git a/drivers/net/wireless/iwlwifi/iwl-calib.c b/drivers/net/wireless/iwlwifi/iwl-calib.c index de3b3f403d1f..8b516c5ff0bb 100644 --- a/drivers/net/wireless/iwlwifi/iwl-calib.c +++ b/drivers/net/wireless/iwlwifi/iwl-calib.c @@ -808,6 +808,18 @@ void iwl_chain_noise_calibration(struct iwl_priv *priv, } } + /* + * The above algorithm sometimes fails when the ucode + * reports 0 for all chains. It's not clear why that + * happens to start with, but it is then causing trouble + * because this can make us enable more chains than the + * hardware really has. + * + * To be safe, simply mask out any chains that we know + * are not on the device. + */ + active_chains &= priv->hw_params.valid_rx_ant; + num_tx_chains = 0; for (i = 0; i < NUM_RX_CHAINS; i++) { /* loops on all the bits of diff --git a/drivers/pcmcia/cistpl.c b/drivers/pcmcia/cistpl.c index f230f6543bff..854959cada3a 100644 --- a/drivers/pcmcia/cistpl.c +++ b/drivers/pcmcia/cistpl.c @@ -1484,6 +1484,11 @@ int pccard_validate_cis(struct pcmcia_socket *s, unsigned int *info) if (!s) return -EINVAL; + if (s->functions) { + WARN_ON(1); + return -EINVAL; + } + /* We do not want to validate the CIS cache... */ mutex_lock(&s->ops_mutex); destroy_cis_cache(s); @@ -1639,7 +1644,7 @@ static ssize_t pccard_show_cis(struct kobject *kobj, count = 0; else { struct pcmcia_socket *s; - unsigned int chains; + unsigned int chains = 1; if (off + count > size) count = size - off; @@ -1648,7 +1653,7 @@ static ssize_t pccard_show_cis(struct kobject *kobj, if (!(s->state & SOCKET_PRESENT)) return -ENODEV; - if (pccard_validate_cis(s, &chains)) + if (!s->functions && pccard_validate_cis(s, &chains)) return -EIO; if (!chains) return -ENODATA; diff --git a/drivers/pcmcia/db1xxx_ss.c b/drivers/pcmcia/db1xxx_ss.c index 6206408e196c..2d48196a48cd 100644 --- a/drivers/pcmcia/db1xxx_ss.c +++ b/drivers/pcmcia/db1xxx_ss.c @@ -166,8 +166,10 @@ static int db1x_pcmcia_setup_irqs(struct db1x_pcmcia_sock *sock) ret = request_irq(sock->insert_irq, db1200_pcmcia_cdirq, IRQF_DISABLED, "pcmcia_insert", sock); - if (ret) + if (ret) { + local_irq_restore(flags); goto out1; + } ret = request_irq(sock->eject_irq, db1200_pcmcia_cdirq, IRQF_DISABLED, "pcmcia_eject", sock); diff --git a/drivers/pcmcia/ds.c b/drivers/pcmcia/ds.c index cb6036d89e59..4014cf8e4a26 100644 --- a/drivers/pcmcia/ds.c +++ b/drivers/pcmcia/ds.c @@ -687,12 +687,10 @@ static void pcmcia_requery(struct pcmcia_socket *s) new_funcs = mfc.nfn; else new_funcs = 1; - if (old_funcs > new_funcs) { + if (old_funcs != new_funcs) { + /* we need to re-start */ pcmcia_card_remove(s, NULL); pcmcia_card_add(s); - } else if (new_funcs > old_funcs) { - s->functions = new_funcs; - pcmcia_device_add(s, 1); } } @@ -728,6 +726,8 @@ static int pcmcia_load_firmware(struct pcmcia_device *dev, char * filename) struct pcmcia_socket *s = dev->socket; const struct firmware *fw; int ret = -ENOMEM; + cistpl_longlink_mfc_t mfc; + int old_funcs, new_funcs = 1; if (!filename) return -EINVAL; @@ -750,6 +750,14 @@ static int pcmcia_load_firmware(struct pcmcia_device *dev, char * filename) goto release; } + /* we need to re-start if the number of functions changed */ + old_funcs = s->functions; + if (!pccard_read_tuple(s, BIND_FN_ALL, CISTPL_LONGLINK_MFC, + &mfc)) + new_funcs = mfc.nfn; + + if (old_funcs != new_funcs) + ret = -EBUSY; /* update information */ pcmcia_device_query(dev); @@ -858,10 +866,8 @@ static inline int pcmcia_devmatch(struct pcmcia_device *dev, if (did->match_flags & PCMCIA_DEV_ID_MATCH_FAKE_CIS) { dev_dbg(&dev->dev, "device needs a fake CIS\n"); if (!dev->socket->fake_cis) - pcmcia_load_firmware(dev, did->cisfile); - - if (!dev->socket->fake_cis) - return 0; + if (pcmcia_load_firmware(dev, did->cisfile)) + return 0; } if (did->match_flags & PCMCIA_DEV_ID_MATCH_ANONYMOUS) { diff --git a/drivers/pcmcia/pcmcia_resource.c b/drivers/pcmcia/pcmcia_resource.c index caec1dee2a4b..7c3d03bb4f30 100644 --- a/drivers/pcmcia/pcmcia_resource.c +++ b/drivers/pcmcia/pcmcia_resource.c @@ -755,12 +755,12 @@ int pcmcia_request_irq(struct pcmcia_device *p_dev, irq_req_t *req) else printk(KERN_WARNING "pcmcia: Driver needs updating to support IRQ sharing.\n"); -#ifdef CONFIG_PCMCIA_PROBE - - if (s->irq.AssignedIRQ != 0) { - /* If the interrupt is already assigned, it must be the same */ + /* If the interrupt is already assigned, it must be the same */ + if (s->irq.AssignedIRQ != 0) irq = s->irq.AssignedIRQ; - } else { + +#ifdef CONFIG_PCMCIA_PROBE + if (!irq) { int try; u32 mask = s->irq_mask; void *data = p_dev; /* something unique to this device */ diff --git a/drivers/pcmcia/rsrc_nonstatic.c b/drivers/pcmcia/rsrc_nonstatic.c index 559069a80a3b..a6eb7b59ba9f 100644 --- a/drivers/pcmcia/rsrc_nonstatic.c +++ b/drivers/pcmcia/rsrc_nonstatic.c @@ -214,7 +214,7 @@ static void do_io_probe(struct pcmcia_socket *s, unsigned int base, return; } for (i = base, most = 0; i < base+num; i += 8) { - res = claim_region(NULL, i, 8, IORESOURCE_IO, "PCMCIA ioprobe"); + res = claim_region(s, i, 8, IORESOURCE_IO, "PCMCIA ioprobe"); if (!res) continue; hole = inb(i); @@ -231,9 +231,14 @@ static void do_io_probe(struct pcmcia_socket *s, unsigned int base, bad = any = 0; for (i = base; i < base+num; i += 8) { - res = claim_region(NULL, i, 8, IORESOURCE_IO, "PCMCIA ioprobe"); - if (!res) + res = claim_region(s, i, 8, IORESOURCE_IO, "PCMCIA ioprobe"); + if (!res) { + if (!any) + printk(" excluding"); + if (!bad) + bad = any = i; continue; + } for (j = 0; j < 8; j++) if (inb(i+j) != most) break; @@ -253,6 +258,7 @@ static void do_io_probe(struct pcmcia_socket *s, unsigned int base, } if (bad) { if ((num > 16) && (bad == base) && (i == base+num)) { + sub_interval(&s_data->io_db, bad, i-bad); printk(" nothing: probe failed.\n"); return; } else { @@ -804,7 +810,7 @@ static int adjust_memory(struct pcmcia_socket *s, unsigned int action, unsigned static int adjust_io(struct pcmcia_socket *s, unsigned int action, unsigned long start, unsigned long end) { struct socket_data *data = s->resource_data; - unsigned long size = end - start + 1; + unsigned long size; int ret = 0; #if defined(CONFIG_X86) @@ -814,6 +820,8 @@ static int adjust_io(struct pcmcia_socket *s, unsigned int action, unsigned long start = 0x100; #endif + size = end - start + 1; + if (end < start) return -EINVAL; diff --git a/drivers/serial/serial_cs.c b/drivers/serial/serial_cs.c index 175d202ab37e..8cfa5b12ea7a 100644 --- a/drivers/serial/serial_cs.c +++ b/drivers/serial/serial_cs.c @@ -105,6 +105,10 @@ struct serial_cfg_mem { * manfid 0x0160, 0x0104 * This card appears to have a 14.7456MHz clock. */ +/* Generic Modem: MD55x (GPRS/EDGE) have + * Elan VPU16551 UART with 14.7456MHz oscillator + * manfid 0x015D, 0x4C45 + */ static void quirk_setup_brainboxes_0104(struct pcmcia_device *link, struct uart_port *port) { port->uartclk = 14745600; @@ -196,6 +200,11 @@ static const struct serial_quirk quirks[] = { .multi = -1, .setup = quirk_setup_brainboxes_0104, }, { + .manfid = 0x015D, + .prodid = 0x4C45, + .multi = -1, + .setup = quirk_setup_brainboxes_0104, + }, { .manfid = MANFID_IBM, .prodid = ~0, .multi = -1, diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c index 5e813a816ce4..b3feddc4f7d6 100644 --- a/fs/afs/mntpt.c +++ b/fs/afs/mntpt.c @@ -138,9 +138,9 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt) { struct afs_super_info *super; struct vfsmount *mnt; - struct page *page = NULL; + struct page *page; size_t size; - char *buf, *devname = NULL, *options = NULL; + char *buf, *devname, *options; int ret; _enter("{%s}", mntpt->d_name.name); @@ -150,22 +150,22 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt) ret = -EINVAL; size = mntpt->d_inode->i_size; if (size > PAGE_SIZE - 1) - goto error; + goto error_no_devname; ret = -ENOMEM; devname = (char *) get_zeroed_page(GFP_KERNEL); if (!devname) - goto error; + goto error_no_devname; options = (char *) get_zeroed_page(GFP_KERNEL); if (!options) - goto error; + goto error_no_options; /* read the contents of the AFS special symlink */ page = read_mapping_page(mntpt->d_inode->i_mapping, 0, NULL); if (IS_ERR(page)) { ret = PTR_ERR(page); - goto error; + goto error_no_page; } ret = -EIO; @@ -196,12 +196,12 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt) return mnt; error: - if (page) - page_cache_release(page); - if (devname) - free_page((unsigned long) devname); - if (options) - free_page((unsigned long) options); + page_cache_release(page); +error_no_page: + free_page((unsigned long) options); +error_no_options: + free_page((unsigned long) devname); +error_no_devname: _leave(" = %d", ret); return ERR_PTR(ret); } diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index efb2b9400391..1cc087635a5e 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c @@ -382,8 +382,8 @@ out: static void ecryptfs_lower_offset_for_extent(loff_t *offset, loff_t extent_num, struct ecryptfs_crypt_stat *crypt_stat) { - (*offset) = (crypt_stat->num_header_bytes_at_front - + (crypt_stat->extent_size * extent_num)); + (*offset) = ecryptfs_lower_header_size(crypt_stat) + + (crypt_stat->extent_size * extent_num); } /** @@ -835,13 +835,13 @@ void ecryptfs_set_default_sizes(struct ecryptfs_crypt_stat *crypt_stat) set_extent_mask_and_shift(crypt_stat); crypt_stat->iv_bytes = ECRYPTFS_DEFAULT_IV_BYTES; if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR) - crypt_stat->num_header_bytes_at_front = 0; + crypt_stat->metadata_size = ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE; else { if (PAGE_CACHE_SIZE <= ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE) - crypt_stat->num_header_bytes_at_front = + crypt_stat->metadata_size = ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE; else - crypt_stat->num_header_bytes_at_front = PAGE_CACHE_SIZE; + crypt_stat->metadata_size = PAGE_CACHE_SIZE; } } @@ -1108,9 +1108,9 @@ static void write_ecryptfs_marker(char *page_virt, size_t *written) (*written) = MAGIC_ECRYPTFS_MARKER_SIZE_BYTES; } -static void -write_ecryptfs_flags(char *page_virt, struct ecryptfs_crypt_stat *crypt_stat, - size_t *written) +void ecryptfs_write_crypt_stat_flags(char *page_virt, + struct ecryptfs_crypt_stat *crypt_stat, + size_t *written) { u32 flags = 0; int i; @@ -1238,8 +1238,7 @@ ecryptfs_write_header_metadata(char *virt, header_extent_size = (u32)crypt_stat->extent_size; num_header_extents_at_front = - (u16)(crypt_stat->num_header_bytes_at_front - / crypt_stat->extent_size); + (u16)(crypt_stat->metadata_size / crypt_stat->extent_size); put_unaligned_be32(header_extent_size, virt); virt += 4; put_unaligned_be16(num_header_extents_at_front, virt); @@ -1292,7 +1291,8 @@ static int ecryptfs_write_headers_virt(char *page_virt, size_t max, offset = ECRYPTFS_FILE_SIZE_BYTES; write_ecryptfs_marker((page_virt + offset), &written); offset += written; - write_ecryptfs_flags((page_virt + offset), crypt_stat, &written); + ecryptfs_write_crypt_stat_flags((page_virt + offset), crypt_stat, + &written); offset += written; ecryptfs_write_header_metadata((page_virt + offset), crypt_stat, &written); @@ -1382,7 +1382,7 @@ int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry) rc = -EINVAL; goto out; } - virt_len = crypt_stat->num_header_bytes_at_front; + virt_len = crypt_stat->metadata_size; order = get_order(virt_len); /* Released in this function */ virt = (char *)ecryptfs_get_zeroed_pages(GFP_KERNEL, order); @@ -1428,16 +1428,15 @@ static int parse_header_metadata(struct ecryptfs_crypt_stat *crypt_stat, header_extent_size = get_unaligned_be32(virt); virt += sizeof(__be32); num_header_extents_at_front = get_unaligned_be16(virt); - crypt_stat->num_header_bytes_at_front = - (((size_t)num_header_extents_at_front - * (size_t)header_extent_size)); + crypt_stat->metadata_size = (((size_t)num_header_extents_at_front + * (size_t)header_extent_size)); (*bytes_read) = (sizeof(__be32) + sizeof(__be16)); if ((validate_header_size == ECRYPTFS_VALIDATE_HEADER_SIZE) - && (crypt_stat->num_header_bytes_at_front + && (crypt_stat->metadata_size < ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE)) { rc = -EINVAL; printk(KERN_WARNING "Invalid header size: [%zd]\n", - crypt_stat->num_header_bytes_at_front); + crypt_stat->metadata_size); } return rc; } @@ -1452,8 +1451,7 @@ static int parse_header_metadata(struct ecryptfs_crypt_stat *crypt_stat, */ static void set_default_header_data(struct ecryptfs_crypt_stat *crypt_stat) { - crypt_stat->num_header_bytes_at_front = - ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE; + crypt_stat->metadata_size = ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE; } /** @@ -1607,6 +1605,7 @@ int ecryptfs_read_metadata(struct dentry *ecryptfs_dentry) ecryptfs_dentry, ECRYPTFS_VALIDATE_HEADER_SIZE); if (rc) { + memset(page_virt, 0, PAGE_CACHE_SIZE); rc = ecryptfs_read_xattr_region(page_virt, ecryptfs_inode); if (rc) { printk(KERN_DEBUG "Valid eCryptfs headers not found in " diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h index 542f625312f3..bc7115403f38 100644 --- a/fs/ecryptfs/ecryptfs_kernel.h +++ b/fs/ecryptfs/ecryptfs_kernel.h @@ -273,7 +273,7 @@ struct ecryptfs_crypt_stat { u32 flags; unsigned int file_version; size_t iv_bytes; - size_t num_header_bytes_at_front; + size_t metadata_size; size_t extent_size; /* Data extent size; default is 4096 */ size_t key_size; size_t extent_shift; @@ -464,6 +464,14 @@ struct ecryptfs_daemon { extern struct mutex ecryptfs_daemon_hash_mux; +static inline size_t +ecryptfs_lower_header_size(struct ecryptfs_crypt_stat *crypt_stat) +{ + if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR) + return 0; + return crypt_stat->metadata_size; +} + static inline struct ecryptfs_file_info * ecryptfs_file_to_private(struct file *file) { @@ -651,6 +659,9 @@ int ecryptfs_decrypt_page(struct page *page); int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry); int ecryptfs_read_metadata(struct dentry *ecryptfs_dentry); int ecryptfs_new_file_context(struct dentry *ecryptfs_dentry); +void ecryptfs_write_crypt_stat_flags(char *page_virt, + struct ecryptfs_crypt_stat *crypt_stat, + size_t *written); int ecryptfs_read_and_validate_header_region(char *data, struct inode *ecryptfs_inode); int ecryptfs_read_and_validate_xattr_region(char *page_virt, diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index d3362faf3852..e2d4418affac 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -324,6 +324,7 @@ int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry, rc = ecryptfs_read_and_validate_header_region(page_virt, ecryptfs_dentry->d_inode); if (rc) { + memset(page_virt, 0, PAGE_CACHE_SIZE); rc = ecryptfs_read_and_validate_xattr_region(page_virt, ecryptfs_dentry); if (rc) { @@ -336,7 +337,7 @@ int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry, ecryptfs_dentry->d_sb)->mount_crypt_stat; if (mount_crypt_stat->flags & ECRYPTFS_ENCRYPTED_VIEW_ENABLED) { if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR) - file_size = (crypt_stat->num_header_bytes_at_front + file_size = (crypt_stat->metadata_size + i_size_read(lower_dentry->d_inode)); else file_size = i_size_read(lower_dentry->d_inode); @@ -388,9 +389,9 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode, mutex_unlock(&lower_dir_dentry->d_inode->i_mutex); if (IS_ERR(lower_dentry)) { rc = PTR_ERR(lower_dentry); - printk(KERN_ERR "%s: lookup_one_len() returned [%d] on " - "lower_dentry = [%s]\n", __func__, rc, - ecryptfs_dentry->d_name.name); + ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_len() returned " + "[%d] on lower_dentry = [%s]\n", __func__, rc, + encrypted_and_encoded_name); goto out_d_drop; } if (lower_dentry->d_inode) @@ -417,9 +418,9 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode, mutex_unlock(&lower_dir_dentry->d_inode->i_mutex); if (IS_ERR(lower_dentry)) { rc = PTR_ERR(lower_dentry); - printk(KERN_ERR "%s: lookup_one_len() returned [%d] on " - "lower_dentry = [%s]\n", __func__, rc, - encrypted_and_encoded_name); + ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_len() returned " + "[%d] on lower_dentry = [%s]\n", __func__, rc, + encrypted_and_encoded_name); goto out_d_drop; } lookup_and_interpose: @@ -456,8 +457,8 @@ static int ecryptfs_link(struct dentry *old_dentry, struct inode *dir, rc = ecryptfs_interpose(lower_new_dentry, new_dentry, dir->i_sb, 0); if (rc) goto out_lock; - fsstack_copy_attr_times(dir, lower_new_dentry->d_inode); - fsstack_copy_inode_size(dir, lower_new_dentry->d_inode); + fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode); + fsstack_copy_inode_size(dir, lower_dir_dentry->d_inode); old_dentry->d_inode->i_nlink = ecryptfs_inode_to_lower(old_dentry->d_inode)->i_nlink; i_size_write(new_dentry->d_inode, file_size_save); @@ -648,38 +649,17 @@ out_lock: return rc; } -static int -ecryptfs_readlink(struct dentry *dentry, char __user *buf, int bufsiz) +static int ecryptfs_readlink_lower(struct dentry *dentry, char **buf, + size_t *bufsiz) { + struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry); char *lower_buf; - size_t lower_bufsiz; - struct dentry *lower_dentry; - struct ecryptfs_mount_crypt_stat *mount_crypt_stat; - char *plaintext_name; - size_t plaintext_name_size; + size_t lower_bufsiz = PATH_MAX; mm_segment_t old_fs; int rc; - lower_dentry = ecryptfs_dentry_to_lower(dentry); - if (!lower_dentry->d_inode->i_op->readlink) { - rc = -EINVAL; - goto out; - } - mount_crypt_stat = &ecryptfs_superblock_to_private( - dentry->d_sb)->mount_crypt_stat; - /* - * If the lower filename is encrypted, it will result in a significantly - * longer name. If needed, truncate the name after decode and decrypt. - */ - if (mount_crypt_stat->flags & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES) - lower_bufsiz = PATH_MAX; - else - lower_bufsiz = bufsiz; - /* Released in this function */ lower_buf = kmalloc(lower_bufsiz, GFP_KERNEL); - if (lower_buf == NULL) { - printk(KERN_ERR "%s: Out of memory whilst attempting to " - "kmalloc [%zd] bytes\n", __func__, lower_bufsiz); + if (!lower_buf) { rc = -ENOMEM; goto out; } @@ -689,29 +669,31 @@ ecryptfs_readlink(struct dentry *dentry, char __user *buf, int bufsiz) (char __user *)lower_buf, lower_bufsiz); set_fs(old_fs); - if (rc >= 0) { - rc = ecryptfs_decode_and_decrypt_filename(&plaintext_name, - &plaintext_name_size, - dentry, lower_buf, - rc); - if (rc) { - printk(KERN_ERR "%s: Error attempting to decode and " - "decrypt filename; rc = [%d]\n", __func__, - rc); - goto out_free_lower_buf; - } - /* Check for bufsiz <= 0 done in sys_readlinkat() */ - rc = copy_to_user(buf, plaintext_name, - min((size_t) bufsiz, plaintext_name_size)); - if (rc) - rc = -EFAULT; - else - rc = plaintext_name_size; - kfree(plaintext_name); - fsstack_copy_attr_atime(dentry->d_inode, lower_dentry->d_inode); - } -out_free_lower_buf: + if (rc < 0) + goto out; + lower_bufsiz = rc; + rc = ecryptfs_decode_and_decrypt_filename(buf, bufsiz, dentry, + lower_buf, lower_bufsiz); +out: kfree(lower_buf); + return rc; +} + +static int +ecryptfs_readlink(struct dentry *dentry, char __user *buf, int bufsiz) +{ + char *kbuf; + size_t kbufsiz, copied; + int rc; + + rc = ecryptfs_readlink_lower(dentry, &kbuf, &kbufsiz); + if (rc) + goto out; + copied = min_t(size_t, bufsiz, kbufsiz); + rc = copy_to_user(buf, kbuf, copied) ? -EFAULT : copied; + kfree(kbuf); + fsstack_copy_attr_atime(dentry->d_inode, + ecryptfs_dentry_to_lower(dentry)->d_inode); out: return rc; } @@ -769,7 +751,7 @@ upper_size_to_lower_size(struct ecryptfs_crypt_stat *crypt_stat, { loff_t lower_size; - lower_size = crypt_stat->num_header_bytes_at_front; + lower_size = ecryptfs_lower_header_size(crypt_stat); if (upper_size != 0) { loff_t num_extents; @@ -1016,6 +998,28 @@ out: return rc; } +int ecryptfs_getattr_link(struct vfsmount *mnt, struct dentry *dentry, + struct kstat *stat) +{ + struct ecryptfs_mount_crypt_stat *mount_crypt_stat; + int rc = 0; + + mount_crypt_stat = &ecryptfs_superblock_to_private( + dentry->d_sb)->mount_crypt_stat; + generic_fillattr(dentry->d_inode, stat); + if (mount_crypt_stat->flags & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES) { + char *target; + size_t targetsiz; + + rc = ecryptfs_readlink_lower(dentry, &target, &targetsiz); + if (!rc) { + kfree(target); + stat->size = targetsiz; + } + } + return rc; +} + int ecryptfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) { @@ -1040,7 +1044,7 @@ ecryptfs_setxattr(struct dentry *dentry, const char *name, const void *value, lower_dentry = ecryptfs_dentry_to_lower(dentry); if (!lower_dentry->d_inode->i_op->setxattr) { - rc = -ENOSYS; + rc = -EOPNOTSUPP; goto out; } mutex_lock(&lower_dentry->d_inode->i_mutex); @@ -1058,7 +1062,7 @@ ecryptfs_getxattr_lower(struct dentry *lower_dentry, const char *name, int rc = 0; if (!lower_dentry->d_inode->i_op->getxattr) { - rc = -ENOSYS; + rc = -EOPNOTSUPP; goto out; } mutex_lock(&lower_dentry->d_inode->i_mutex); @@ -1085,7 +1089,7 @@ ecryptfs_listxattr(struct dentry *dentry, char *list, size_t size) lower_dentry = ecryptfs_dentry_to_lower(dentry); if (!lower_dentry->d_inode->i_op->listxattr) { - rc = -ENOSYS; + rc = -EOPNOTSUPP; goto out; } mutex_lock(&lower_dentry->d_inode->i_mutex); @@ -1102,7 +1106,7 @@ static int ecryptfs_removexattr(struct dentry *dentry, const char *name) lower_dentry = ecryptfs_dentry_to_lower(dentry); if (!lower_dentry->d_inode->i_op->removexattr) { - rc = -ENOSYS; + rc = -EOPNOTSUPP; goto out; } mutex_lock(&lower_dentry->d_inode->i_mutex); @@ -1133,6 +1137,7 @@ const struct inode_operations ecryptfs_symlink_iops = { .put_link = ecryptfs_put_link, .permission = ecryptfs_permission, .setattr = ecryptfs_setattr, + .getattr = ecryptfs_getattr_link, .setxattr = ecryptfs_setxattr, .getxattr = ecryptfs_getxattr, .listxattr = ecryptfs_listxattr, diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c index d491237c98e7..2ee9a3a7b68c 100644 --- a/fs/ecryptfs/mmap.c +++ b/fs/ecryptfs/mmap.c @@ -83,6 +83,19 @@ out: return rc; } +static void strip_xattr_flag(char *page_virt, + struct ecryptfs_crypt_stat *crypt_stat) +{ + if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR) { + size_t written; + + crypt_stat->flags &= ~ECRYPTFS_METADATA_IN_XATTR; + ecryptfs_write_crypt_stat_flags(page_virt, crypt_stat, + &written); + crypt_stat->flags |= ECRYPTFS_METADATA_IN_XATTR; + } +} + /** * Header Extent: * Octets 0-7: Unencrypted file size (big-endian) @@ -98,19 +111,6 @@ out: * (big-endian) * Octet 26: Begin RFC 2440 authentication token packet set */ -static void set_header_info(char *page_virt, - struct ecryptfs_crypt_stat *crypt_stat) -{ - size_t written; - size_t save_num_header_bytes_at_front = - crypt_stat->num_header_bytes_at_front; - - crypt_stat->num_header_bytes_at_front = - ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE; - ecryptfs_write_header_metadata(page_virt + 20, crypt_stat, &written); - crypt_stat->num_header_bytes_at_front = - save_num_header_bytes_at_front; -} /** * ecryptfs_copy_up_encrypted_with_header @@ -136,8 +136,7 @@ ecryptfs_copy_up_encrypted_with_header(struct page *page, * num_extents_per_page) + extent_num_in_page); size_t num_header_extents_at_front = - (crypt_stat->num_header_bytes_at_front - / crypt_stat->extent_size); + (crypt_stat->metadata_size / crypt_stat->extent_size); if (view_extent_num < num_header_extents_at_front) { /* This is a header extent */ @@ -147,9 +146,14 @@ ecryptfs_copy_up_encrypted_with_header(struct page *page, memset(page_virt, 0, PAGE_CACHE_SIZE); /* TODO: Support more than one header extent */ if (view_extent_num == 0) { + size_t written; + rc = ecryptfs_read_xattr_region( page_virt, page->mapping->host); - set_header_info(page_virt, crypt_stat); + strip_xattr_flag(page_virt + 16, crypt_stat); + ecryptfs_write_header_metadata(page_virt + 20, + crypt_stat, + &written); } kunmap_atomic(page_virt, KM_USER0); flush_dcache_page(page); @@ -162,7 +166,7 @@ ecryptfs_copy_up_encrypted_with_header(struct page *page, /* This is an encrypted data extent */ loff_t lower_offset = ((view_extent_num * crypt_stat->extent_size) - - crypt_stat->num_header_bytes_at_front); + - crypt_stat->metadata_size); rc = ecryptfs_read_lower_page_segment( page, (lower_offset >> PAGE_CACHE_SHIFT), diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c index fcef41c1d2cf..278743c7716a 100644 --- a/fs/ecryptfs/super.c +++ b/fs/ecryptfs/super.c @@ -86,7 +86,6 @@ static void ecryptfs_destroy_inode(struct inode *inode) if (lower_dentry->d_inode) { fput(inode_info->lower_file); inode_info->lower_file = NULL; - d_drop(lower_dentry); } } ecryptfs_destroy_crypt_stat(&inode_info->crypt_stat); diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c index 9dd126276c9f..ed9ba6fe04f5 100644 --- a/fs/jfs/inode.c +++ b/fs/jfs/inode.c @@ -61,7 +61,7 @@ struct inode *jfs_iget(struct super_block *sb, unsigned long ino) inode->i_op = &page_symlink_inode_operations; inode->i_mapping->a_ops = &jfs_aops; } else { - inode->i_op = &jfs_symlink_inode_operations; + inode->i_op = &jfs_fast_symlink_inode_operations; /* * The inline data should be null-terminated, but * don't let on-disk corruption crash the kernel diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c index 6c4dfcbf3f55..9e2f6a721668 100644 --- a/fs/jfs/jfs_dmap.c +++ b/fs/jfs/jfs_dmap.c @@ -196,7 +196,7 @@ int dbMount(struct inode *ipbmap) bmp->db_maxag = le32_to_cpu(dbmp_le->dn_maxag); bmp->db_agpref = le32_to_cpu(dbmp_le->dn_agpref); bmp->db_aglevel = le32_to_cpu(dbmp_le->dn_aglevel); - bmp->db_agheigth = le32_to_cpu(dbmp_le->dn_agheigth); + bmp->db_agheight = le32_to_cpu(dbmp_le->dn_agheight); bmp->db_agwidth = le32_to_cpu(dbmp_le->dn_agwidth); bmp->db_agstart = le32_to_cpu(dbmp_le->dn_agstart); bmp->db_agl2size = le32_to_cpu(dbmp_le->dn_agl2size); @@ -288,7 +288,7 @@ int dbSync(struct inode *ipbmap) dbmp_le->dn_maxag = cpu_to_le32(bmp->db_maxag); dbmp_le->dn_agpref = cpu_to_le32(bmp->db_agpref); dbmp_le->dn_aglevel = cpu_to_le32(bmp->db_aglevel); - dbmp_le->dn_agheigth = cpu_to_le32(bmp->db_agheigth); + dbmp_le->dn_agheight = cpu_to_le32(bmp->db_agheight); dbmp_le->dn_agwidth = cpu_to_le32(bmp->db_agwidth); dbmp_le->dn_agstart = cpu_to_le32(bmp->db_agstart); dbmp_le->dn_agl2size = cpu_to_le32(bmp->db_agl2size); @@ -1441,7 +1441,7 @@ dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, s64 * results) * tree index of this allocation group within the control page. */ agperlev = - (1 << (L2LPERCTL - (bmp->db_agheigth << 1))) / bmp->db_agwidth; + (1 << (L2LPERCTL - (bmp->db_agheight << 1))) / bmp->db_agwidth; ti = bmp->db_agstart + bmp->db_agwidth * (agno & (agperlev - 1)); /* dmap control page trees fan-out by 4 and a single allocation @@ -1460,7 +1460,7 @@ dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, s64 * results) * the subtree to find the leftmost leaf that describes this * free space. */ - for (k = bmp->db_agheigth; k > 0; k--) { + for (k = bmp->db_agheight; k > 0; k--) { for (n = 0, m = (ti << 2) + 1; n < 4; n++) { if (l2nb <= dcp->stree[m + n]) { ti = m + n; @@ -3607,7 +3607,7 @@ void dbFinalizeBmap(struct inode *ipbmap) } /* - * compute db_aglevel, db_agheigth, db_width, db_agstart: + * compute db_aglevel, db_agheight, db_width, db_agstart: * an ag is covered in aglevel dmapctl summary tree, * at agheight level height (from leaf) with agwidth number of nodes * each, which starts at agstart index node of the smmary tree node @@ -3616,9 +3616,9 @@ void dbFinalizeBmap(struct inode *ipbmap) bmp->db_aglevel = BMAPSZTOLEV(bmp->db_agsize); l2nl = bmp->db_agl2size - (L2BPERDMAP + bmp->db_aglevel * L2LPERCTL); - bmp->db_agheigth = l2nl >> 1; - bmp->db_agwidth = 1 << (l2nl - (bmp->db_agheigth << 1)); - for (i = 5 - bmp->db_agheigth, bmp->db_agstart = 0, n = 1; i > 0; + bmp->db_agheight = l2nl >> 1; + bmp->db_agwidth = 1 << (l2nl - (bmp->db_agheight << 1)); + for (i = 5 - bmp->db_agheight, bmp->db_agstart = 0, n = 1; i > 0; i--) { bmp->db_agstart += n; n <<= 2; diff --git a/fs/jfs/jfs_dmap.h b/fs/jfs/jfs_dmap.h index 1a6eb41569bc..6dcb906c55d8 100644 --- a/fs/jfs/jfs_dmap.h +++ b/fs/jfs/jfs_dmap.h @@ -210,7 +210,7 @@ struct dbmap_disk { __le32 dn_maxag; /* 4: max active alloc group number */ __le32 dn_agpref; /* 4: preferred alloc group (hint) */ __le32 dn_aglevel; /* 4: dmapctl level holding the AG */ - __le32 dn_agheigth; /* 4: height in dmapctl of the AG */ + __le32 dn_agheight; /* 4: height in dmapctl of the AG */ __le32 dn_agwidth; /* 4: width in dmapctl of the AG */ __le32 dn_agstart; /* 4: start tree index at AG height */ __le32 dn_agl2size; /* 4: l2 num of blks per alloc group */ @@ -229,7 +229,7 @@ struct dbmap { int dn_maxag; /* max active alloc group number */ int dn_agpref; /* preferred alloc group (hint) */ int dn_aglevel; /* dmapctl level holding the AG */ - int dn_agheigth; /* height in dmapctl of the AG */ + int dn_agheight; /* height in dmapctl of the AG */ int dn_agwidth; /* width in dmapctl of the AG */ int dn_agstart; /* start tree index at AG height */ int dn_agl2size; /* l2 num of blks per alloc group */ @@ -255,7 +255,7 @@ struct bmap { #define db_agsize db_bmap.dn_agsize #define db_agl2size db_bmap.dn_agl2size #define db_agwidth db_bmap.dn_agwidth -#define db_agheigth db_bmap.dn_agheigth +#define db_agheight db_bmap.dn_agheight #define db_agstart db_bmap.dn_agstart #define db_numag db_bmap.dn_numag #define db_maxlevel db_bmap.dn_maxlevel diff --git a/fs/jfs/jfs_inode.h b/fs/jfs/jfs_inode.h index 79e2c79661df..9e6bda30a6e8 100644 --- a/fs/jfs/jfs_inode.h +++ b/fs/jfs/jfs_inode.h @@ -48,5 +48,6 @@ extern const struct file_operations jfs_dir_operations; extern const struct inode_operations jfs_file_inode_operations; extern const struct file_operations jfs_file_operations; extern const struct inode_operations jfs_symlink_inode_operations; +extern const struct inode_operations jfs_fast_symlink_inode_operations; extern const struct dentry_operations jfs_ci_dentry_operations; #endif /* _H_JFS_INODE */ diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index 4a3e9f39c21d..a9cf8e8675be 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c @@ -956,7 +956,7 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry, */ if (ssize <= IDATASIZE) { - ip->i_op = &jfs_symlink_inode_operations; + ip->i_op = &jfs_fast_symlink_inode_operations; i_fastsymlink = JFS_IP(ip)->i_inline; memcpy(i_fastsymlink, name, ssize); @@ -978,7 +978,7 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry, else { jfs_info("jfs_symlink: allocate extent ip:0x%p", ip); - ip->i_op = &page_symlink_inode_operations; + ip->i_op = &jfs_symlink_inode_operations; ip->i_mapping->a_ops = &jfs_aops; /* diff --git a/fs/jfs/resize.c b/fs/jfs/resize.c index 7f24a0bb08ca..1aba0039f1c9 100644 --- a/fs/jfs/resize.c +++ b/fs/jfs/resize.c @@ -81,6 +81,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize) struct inode *iplist[1]; struct jfs_superblock *j_sb, *j_sb2; uint old_agsize; + int agsizechanged = 0; struct buffer_head *bh, *bh2; /* If the volume hasn't grown, get out now */ @@ -333,6 +334,9 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize) */ if ((rc = dbExtendFS(ipbmap, XAddress, nblocks))) goto error_out; + + agsizechanged |= (bmp->db_agsize != old_agsize); + /* * the map now has extended to cover additional nblocks: * dn_mapsize = oldMapsize + nblocks; @@ -432,7 +436,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize) * will correctly identify the new ag); */ /* if new AG size the same as old AG size, done! */ - if (bmp->db_agsize != old_agsize) { + if (agsizechanged) { if ((rc = diExtendFS(ipimap, ipbmap))) goto error_out; diff --git a/fs/jfs/symlink.c b/fs/jfs/symlink.c index 4af1a05aad0a..205b946d8e0d 100644 --- a/fs/jfs/symlink.c +++ b/fs/jfs/symlink.c @@ -29,9 +29,21 @@ static void *jfs_follow_link(struct dentry *dentry, struct nameidata *nd) return NULL; } -const struct inode_operations jfs_symlink_inode_operations = { +const struct inode_operations jfs_fast_symlink_inode_operations = { .readlink = generic_readlink, .follow_link = jfs_follow_link, + .setattr = jfs_setattr, + .setxattr = jfs_setxattr, + .getxattr = jfs_getxattr, + .listxattr = jfs_listxattr, + .removexattr = jfs_removexattr, +}; + +const struct inode_operations jfs_symlink_inode_operations = { + .readlink = generic_readlink, + .follow_link = page_follow_link_light, + .put_link = page_put_link, + .setattr = jfs_setattr, .setxattr = jfs_setxattr, .getxattr = jfs_getxattr, .listxattr = jfs_listxattr, diff --git a/fs/logfs/gc.c b/fs/logfs/gc.c index 84e36f52fe95..76c242fbe1b0 100644 --- a/fs/logfs/gc.c +++ b/fs/logfs/gc.c @@ -459,6 +459,14 @@ static void __logfs_gc_pass(struct super_block *sb, int target) struct logfs_block *block; int round, progress, last_progress = 0; + /* + * Doing too many changes to the segfile at once would result + * in a large number of aliases. Write the journal before + * things get out of hand. + */ + if (super->s_shadow_tree.no_shadowed_segments >= MAX_OBJ_ALIASES) + logfs_write_anchor(sb); + if (no_free_segments(sb) >= target && super->s_no_object_aliases < MAX_OBJ_ALIASES) return; diff --git a/fs/logfs/journal.c b/fs/logfs/journal.c index 33bd260b8309..fb0a613f885b 100644 --- a/fs/logfs/journal.c +++ b/fs/logfs/journal.c @@ -389,7 +389,10 @@ static void journal_get_erase_count(struct logfs_area *area) static int journal_erase_segment(struct logfs_area *area) { struct super_block *sb = area->a_sb; - struct logfs_segment_header sh; + union { + struct logfs_segment_header sh; + unsigned char c[ALIGN(sizeof(struct logfs_segment_header), 16)]; + } u; u64 ofs; int err; @@ -397,20 +400,21 @@ static int journal_erase_segment(struct logfs_area *area) if (err) return err; - sh.pad = 0; - sh.type = SEG_JOURNAL; - sh.level = 0; - sh.segno = cpu_to_be32(area->a_segno); - sh.ec = cpu_to_be32(area->a_erase_count); - sh.gec = cpu_to_be64(logfs_super(sb)->s_gec); - sh.crc = logfs_crc32(&sh, sizeof(sh), 4); + memset(&u, 0, sizeof(u)); + u.sh.pad = 0; + u.sh.type = SEG_JOURNAL; + u.sh.level = 0; + u.sh.segno = cpu_to_be32(area->a_segno); + u.sh.ec = cpu_to_be32(area->a_erase_count); + u.sh.gec = cpu_to_be64(logfs_super(sb)->s_gec); + u.sh.crc = logfs_crc32(&u.sh, sizeof(u.sh), 4); /* This causes a bug in segment.c. Not yet. */ //logfs_set_segment_erased(sb, area->a_segno, area->a_erase_count, 0); ofs = dev_ofs(sb, area->a_segno, 0); - area->a_used_bytes = ALIGN(sizeof(sh), 16); - logfs_buf_write(area, ofs, &sh, sizeof(sh)); + area->a_used_bytes = sizeof(u); + logfs_buf_write(area, ofs, &u, sizeof(u)); return 0; } @@ -494,6 +498,8 @@ static void account_shadows(struct super_block *sb) btree_grim_visitor64(&tree->new, (unsigned long)sb, account_shadow); btree_grim_visitor64(&tree->old, (unsigned long)sb, account_shadow); + btree_grim_visitor32(&tree->segment_map, 0, NULL); + tree->no_shadowed_segments = 0; if (li->li_block) { /* @@ -607,9 +613,9 @@ static size_t __logfs_write_je(struct super_block *sb, void *buf, u16 type, if (len == 0) return logfs_write_header(super, header, 0, type); + BUG_ON(len > sb->s_blocksize); compr_len = logfs_compress(buf, data, len, sb->s_blocksize); if (compr_len < 0 || type == JE_ANCHOR) { - BUG_ON(len > sb->s_blocksize); memcpy(data, buf, len); compr_len = len; compr = COMPR_NONE; @@ -661,6 +667,7 @@ static int logfs_write_je_buf(struct super_block *sb, void *buf, u16 type, if (ofs < 0) return ofs; logfs_buf_write(area, ofs, super->s_compressed_je, len); + BUG_ON(super->s_no_je >= MAX_JOURNAL_ENTRIES); super->s_je_array[super->s_no_je++] = cpu_to_be64(ofs); return 0; } diff --git a/fs/logfs/logfs.h b/fs/logfs/logfs.h index b84b0eec6024..0a3df1a0c936 100644 --- a/fs/logfs/logfs.h +++ b/fs/logfs/logfs.h @@ -257,10 +257,14 @@ struct logfs_shadow { * struct shadow_tree * @new: shadows where old_ofs==0, indexed by new_ofs * @old: shadows where old_ofs!=0, indexed by old_ofs + * @segment_map: bitfield of segments containing shadows + * @no_shadowed_segment: number of segments containing shadows */ struct shadow_tree { struct btree_head64 new; struct btree_head64 old; + struct btree_head32 segment_map; + int no_shadowed_segments; }; struct object_alias_item { @@ -305,13 +309,14 @@ typedef int write_alias_t(struct super_block *sb, u64 ino, u64 bix, level_t level, int child_no, __be64 val); struct logfs_block_ops { void (*write_block)(struct logfs_block *block); - gc_level_t (*block_level)(struct logfs_block *block); void (*free_block)(struct super_block *sb, struct logfs_block*block); int (*write_alias)(struct super_block *sb, struct logfs_block *block, write_alias_t *write_one_alias); }; +#define MAX_JOURNAL_ENTRIES 256 + struct logfs_super { struct mtd_info *s_mtd; /* underlying device */ struct block_device *s_bdev; /* underlying device */ @@ -378,7 +383,7 @@ struct logfs_super { u32 s_journal_ec[LOGFS_JOURNAL_SEGS]; /* journal erasecounts */ u64 s_last_version; struct logfs_area *s_journal_area; /* open journal segment */ - __be64 s_je_array[64]; + __be64 s_je_array[MAX_JOURNAL_ENTRIES]; int s_no_je; int s_sum_index; /* for the 12 summaries */ @@ -722,4 +727,10 @@ static inline struct logfs_area *get_area(struct super_block *sb, return logfs_super(sb)->s_area[(__force u8)gc_level]; } +static inline void logfs_mempool_destroy(mempool_t *pool) +{ + if (pool) + mempool_destroy(pool); +} + #endif diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c index bff40253dfb2..3159db6958e5 100644 --- a/fs/logfs/readwrite.c +++ b/fs/logfs/readwrite.c @@ -430,25 +430,6 @@ static void inode_write_block(struct logfs_block *block) } } -static gc_level_t inode_block_level(struct logfs_block *block) -{ - BUG_ON(block->inode->i_ino == LOGFS_INO_MASTER); - return GC_LEVEL(LOGFS_MAX_LEVELS); -} - -static gc_level_t indirect_block_level(struct logfs_block *block) -{ - struct page *page; - struct inode *inode; - u64 bix; - level_t level; - - page = block->page; - inode = page->mapping->host; - logfs_unpack_index(page->index, &bix, &level); - return expand_level(inode->i_ino, level); -} - /* * This silences a false, yet annoying gcc warning. I hate it when my editor * jumps into bitops.h each time I recompile this file. @@ -587,14 +568,12 @@ static void indirect_free_block(struct super_block *sb, static struct logfs_block_ops inode_block_ops = { .write_block = inode_write_block, - .block_level = inode_block_level, .free_block = inode_free_block, .write_alias = inode_write_alias, }; struct logfs_block_ops indirect_block_ops = { .write_block = indirect_write_block, - .block_level = indirect_block_level, .free_block = indirect_free_block, .write_alias = indirect_write_alias, }; @@ -1241,6 +1220,18 @@ static void free_shadow(struct inode *inode, struct logfs_shadow *shadow) mempool_free(shadow, super->s_shadow_pool); } +static void mark_segment(struct shadow_tree *tree, u32 segno) +{ + int err; + + if (!btree_lookup32(&tree->segment_map, segno)) { + err = btree_insert32(&tree->segment_map, segno, (void *)1, + GFP_NOFS); + BUG_ON(err); + tree->no_shadowed_segments++; + } +} + /** * fill_shadow_tree - Propagate shadow tree changes due to a write * @inode: Inode owning the page @@ -1288,6 +1279,8 @@ static void fill_shadow_tree(struct inode *inode, struct page *page, super->s_dirty_used_bytes += shadow->new_len; super->s_dirty_free_bytes += shadow->old_len; + mark_segment(tree, shadow->old_ofs >> super->s_segshift); + mark_segment(tree, shadow->new_ofs >> super->s_segshift); } } @@ -1845,19 +1838,37 @@ static int __logfs_truncate(struct inode *inode, u64 size) return logfs_truncate_direct(inode, size); } -int logfs_truncate(struct inode *inode, u64 size) +/* + * Truncate, by changing the segment file, can consume a fair amount + * of resources. So back off from time to time and do some GC. + * 8 or 2048 blocks should be well within safety limits even if + * every single block resided in a different segment. + */ +#define TRUNCATE_STEP (8 * 1024 * 1024) +int logfs_truncate(struct inode *inode, u64 target) { struct super_block *sb = inode->i_sb; - int err; + u64 size = i_size_read(inode); + int err = 0; - logfs_get_wblocks(sb, NULL, 1); - err = __logfs_truncate(inode, size); - if (!err) - err = __logfs_write_inode(inode, 0); - logfs_put_wblocks(sb, NULL, 1); + size = ALIGN(size, TRUNCATE_STEP); + while (size > target) { + if (size > TRUNCATE_STEP) + size -= TRUNCATE_STEP; + else + size = 0; + if (size < target) + size = target; + + logfs_get_wblocks(sb, NULL, 1); + err = __logfs_truncate(inode, target); + if (!err) + err = __logfs_write_inode(inode, 0); + logfs_put_wblocks(sb, NULL, 1); + } if (!err) - err = vmtruncate(inode, size); + err = vmtruncate(inode, target); /* I don't trust error recovery yet. */ WARN_ON(err); @@ -2251,8 +2262,6 @@ void logfs_cleanup_rw(struct super_block *sb) struct logfs_super *super = logfs_super(sb); destroy_meta_inode(super->s_segfile_inode); - if (super->s_block_pool) - mempool_destroy(super->s_block_pool); - if (super->s_shadow_pool) - mempool_destroy(super->s_shadow_pool); + logfs_mempool_destroy(super->s_block_pool); + logfs_mempool_destroy(super->s_shadow_pool); } diff --git a/fs/logfs/segment.c b/fs/logfs/segment.c index 801a3a141625..f77ce2b470ba 100644 --- a/fs/logfs/segment.c +++ b/fs/logfs/segment.c @@ -183,14 +183,8 @@ static int btree_write_alias(struct super_block *sb, struct logfs_block *block, return 0; } -static gc_level_t btree_block_level(struct logfs_block *block) -{ - return expand_level(block->ino, block->level); -} - static struct logfs_block_ops btree_block_ops = { .write_block = btree_write_block, - .block_level = btree_block_level, .free_block = __free_block, .write_alias = btree_write_alias, }; @@ -919,7 +913,7 @@ err: for (i--; i >= 0; i--) free_area(super->s_area[i]); free_area(super->s_journal_area); - mempool_destroy(super->s_alias_pool); + logfs_mempool_destroy(super->s_alias_pool); return -ENOMEM; } diff --git a/fs/logfs/super.c b/fs/logfs/super.c index b60bfac3263c..5866ee6e1327 100644 --- a/fs/logfs/super.c +++ b/fs/logfs/super.c @@ -12,6 +12,7 @@ #include "logfs.h" #include <linux/bio.h> #include <linux/slab.h> +#include <linux/blkdev.h> #include <linux/mtd/mtd.h> #include <linux/statfs.h> #include <linux/buffer_head.h> @@ -137,6 +138,10 @@ static int logfs_sb_set(struct super_block *sb, void *_super) sb->s_fs_info = super; sb->s_mtd = super->s_mtd; sb->s_bdev = super->s_bdev; + if (sb->s_bdev) + sb->s_bdi = &bdev_get_queue(sb->s_bdev)->backing_dev_info; + if (sb->s_mtd) + sb->s_bdi = sb->s_mtd->backing_dev_info; return 0; } @@ -452,6 +457,8 @@ static int logfs_read_sb(struct super_block *sb, int read_only) btree_init_mempool64(&super->s_shadow_tree.new, super->s_btree_pool); btree_init_mempool64(&super->s_shadow_tree.old, super->s_btree_pool); + btree_init_mempool32(&super->s_shadow_tree.segment_map, + super->s_btree_pool); ret = logfs_init_mapping(sb); if (ret) @@ -516,8 +523,8 @@ static void logfs_kill_sb(struct super_block *sb) if (super->s_erase_page) __free_page(super->s_erase_page); super->s_devops->put_device(sb); - mempool_destroy(super->s_btree_pool); - mempool_destroy(super->s_alias_pool); + logfs_mempool_destroy(super->s_btree_pool); + logfs_mempool_destroy(super->s_alias_pool); kfree(super); log_super("LogFS: Finished unmounting\n"); } diff --git a/fs/quota/Kconfig b/fs/quota/Kconfig index dad7fb247ddc..3e21b1e2ad3a 100644 --- a/fs/quota/Kconfig +++ b/fs/quota/Kconfig @@ -33,6 +33,14 @@ config PRINT_QUOTA_WARNING Note that this behavior is currently deprecated and may go away in future. Please use notification via netlink socket instead. +config QUOTA_DEBUG + bool "Additional quota sanity checks" + depends on QUOTA + default n + help + If you say Y here, quota subsystem will perform some additional + sanity checks of quota internal structures. If unsure, say N. + # Generic support for tree structured quota files. Selected when needed. config QUOTA_TREE tristate diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index a0a9405b202a..788b5802a7ce 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -80,8 +80,6 @@ #include <asm/uaccess.h> -#define __DQUOT_PARANOIA - /* * There are three quota SMP locks. dq_list_lock protects all lists with quotas * and quota formats, dqstats structure containing statistics about the lists @@ -695,7 +693,7 @@ void dqput(struct dquot *dquot) if (!dquot) return; -#ifdef __DQUOT_PARANOIA +#ifdef CONFIG_QUOTA_DEBUG if (!atomic_read(&dquot->dq_count)) { printk("VFS: dqput: trying to free free dquot\n"); printk("VFS: device %s, dquot of %s %d\n", @@ -748,7 +746,7 @@ we_slept: goto we_slept; } atomic_dec(&dquot->dq_count); -#ifdef __DQUOT_PARANOIA +#ifdef CONFIG_QUOTA_DEBUG /* sanity check */ BUG_ON(!list_empty(&dquot->dq_free)); #endif @@ -845,7 +843,7 @@ we_slept: dquot = NULL; goto out; } -#ifdef __DQUOT_PARANOIA +#ifdef CONFIG_QUOTA_DEBUG BUG_ON(!dquot->dq_sb); /* Has somebody invalidated entry under us? */ #endif out: @@ -874,7 +872,7 @@ static int dqinit_needed(struct inode *inode, int type) static void add_dquot_ref(struct super_block *sb, int type) { struct inode *inode, *old_inode = NULL; -#ifdef __DQUOT_PARANOIA +#ifdef CONFIG_QUOTA_DEBUG int reserved = 0; #endif @@ -882,7 +880,7 @@ static void add_dquot_ref(struct super_block *sb, int type) list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW)) continue; -#ifdef __DQUOT_PARANOIA +#ifdef CONFIG_QUOTA_DEBUG if (unlikely(inode_get_rsv_space(inode) > 0)) reserved = 1; #endif @@ -907,7 +905,7 @@ static void add_dquot_ref(struct super_block *sb, int type) spin_unlock(&inode_lock); iput(old_inode); -#ifdef __DQUOT_PARANOIA +#ifdef CONFIG_QUOTA_DEBUG if (reserved) { printk(KERN_WARNING "VFS (%s): Writes happened before quota" " was turned on thus quota information is probably " @@ -940,7 +938,7 @@ static int remove_inode_dquot_ref(struct inode *inode, int type, inode->i_dquot[type] = NULL; if (dquot) { if (dqput_blocks(dquot)) { -#ifdef __DQUOT_PARANOIA +#ifdef CONFIG_QUOTA_DEBUG if (atomic_read(&dquot->dq_count) != 1) printk(KERN_WARNING "VFS: Adding dquot with dq_count %d to dispose list.\n", atomic_read(&dquot->dq_count)); #endif diff --git a/include/drm/drm_pciids.h b/include/drm/drm_pciids.h index 04a6ebc27b96..2d428b088cc8 100644 --- a/include/drm/drm_pciids.h +++ b/include/drm/drm_pciids.h @@ -6,6 +6,7 @@ {0x1002, 0x3150, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV380|RADEON_IS_MOBILITY}, \ {0x1002, 0x3152, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV380|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ {0x1002, 0x3154, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV380|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ + {0x1002, 0x3155, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV380|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ {0x1002, 0x3E50, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV380|RADEON_NEW_MEMMAP}, \ {0x1002, 0x3E54, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV380|RADEON_NEW_MEMMAP}, \ {0x1002, 0x4136, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RS100|RADEON_IS_IGP}, \ diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index a3fd0f91d943..169d07758ee5 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -54,7 +54,7 @@ extern struct kmem_cache *kvm_vcpu_cache; */ struct kvm_io_bus { int dev_count; -#define NR_IOBUS_DEVS 6 +#define NR_IOBUS_DEVS 200 struct kvm_io_device *devs[NR_IOBUS_DEVS]; }; @@ -119,6 +119,11 @@ struct kvm_memory_slot { int user_alloc; }; +static inline unsigned long kvm_dirty_bitmap_bytes(struct kvm_memory_slot *memslot) +{ + return ALIGN(memslot->npages, BITS_PER_LONG) / 8; +} + struct kvm_kernel_irq_routing_entry { u32 gsi; u32 type; diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 872a98e13d6a..07db2feb8572 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -101,10 +101,7 @@ extern struct lockdep_map rcu_sched_lock_map; # define rcu_read_release_sched() \ lock_release(&rcu_sched_lock_map, 1, _THIS_IP_) -static inline int debug_lockdep_rcu_enabled(void) -{ - return likely(rcu_scheduler_active && debug_locks); -} +extern int debug_lockdep_rcu_enabled(void); /** * rcu_read_lock_held - might we be in RCU read-side critical section? @@ -195,12 +192,30 @@ static inline int rcu_read_lock_sched_held(void) /** * rcu_dereference_check - rcu_dereference with debug checking + * @p: The pointer to read, prior to dereferencing + * @c: The conditions under which the dereference will take place + * + * Do an rcu_dereference(), but check that the conditions under which the + * dereference will take place are correct. Typically the conditions indicate + * the various locking conditions that should be held at that point. The check + * should return true if the conditions are satisfied. + * + * For example: + * + * bar = rcu_dereference_check(foo->bar, rcu_read_lock_held() || + * lockdep_is_held(&foo->lock)); * - * Do an rcu_dereference(), but check that the context is correct. - * For example, rcu_dereference_check(gp, rcu_read_lock_held()) to - * ensure that the rcu_dereference_check() executes within an RCU - * read-side critical section. It is also possible to check for - * locks being held, for example, by using lockdep_is_held(). + * could be used to indicate to lockdep that foo->bar may only be dereferenced + * if either the RCU read lock is held, or that the lock required to replace + * the bar struct at foo->bar is held. + * + * Note that the list of conditions may also include indications of when a lock + * need not be held, for example during initialisation or destruction of the + * target struct: + * + * bar = rcu_dereference_check(foo->bar, rcu_read_lock_held() || + * lockdep_is_held(&foo->lock) || + * atomic_read(&foo->usage) == 0); */ #define rcu_dereference_check(p, c) \ ({ \ @@ -209,13 +224,45 @@ static inline int rcu_read_lock_sched_held(void) rcu_dereference_raw(p); \ }) +/** + * rcu_dereference_protected - fetch RCU pointer when updates prevented + * + * Return the value of the specified RCU-protected pointer, but omit + * both the smp_read_barrier_depends() and the ACCESS_ONCE(). This + * is useful in cases where update-side locks prevent the value of the + * pointer from changing. Please note that this primitive does -not- + * prevent the compiler from repeating this reference or combining it + * with other references, so it should not be used without protection + * of appropriate locks. + */ +#define rcu_dereference_protected(p, c) \ + ({ \ + if (debug_lockdep_rcu_enabled() && !(c)) \ + lockdep_rcu_dereference(__FILE__, __LINE__); \ + (p); \ + }) + #else /* #ifdef CONFIG_PROVE_RCU */ #define rcu_dereference_check(p, c) rcu_dereference_raw(p) +#define rcu_dereference_protected(p, c) (p) #endif /* #else #ifdef CONFIG_PROVE_RCU */ /** + * rcu_access_pointer - fetch RCU pointer with no dereferencing + * + * Return the value of the specified RCU-protected pointer, but omit the + * smp_read_barrier_depends() and keep the ACCESS_ONCE(). This is useful + * when the value of this pointer is accessed, but the pointer is not + * dereferenced, for example, when testing an RCU-protected pointer against + * NULL. This may also be used in cases where update-side locks prevent + * the value of the pointer from changing, but rcu_dereference_protected() + * is a lighter-weight primitive for this use case. + */ +#define rcu_access_pointer(p) ACCESS_ONCE(p) + +/** * rcu_read_lock - mark the beginning of an RCU read-side critical section. * * When synchronize_rcu() is invoked on one CPU while other CPUs diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index 63fe25433980..03a7ea1579f6 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -69,6 +69,13 @@ EXPORT_SYMBOL_GPL(rcu_scheduler_active); #ifdef CONFIG_DEBUG_LOCK_ALLOC +int debug_lockdep_rcu_enabled(void) +{ + return rcu_scheduler_active && debug_locks && + current->lockdep_recursion == 0; +} +EXPORT_SYMBOL_GPL(debug_lockdep_rcu_enabled); + /** * rcu_read_lock_bh_held - might we be in RCU-bh read-side critical section? * diff --git a/mm/rmap.c b/mm/rmap.c index 4bad3267537a..526704e8215d 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -730,23 +730,28 @@ void page_move_anon_rmap(struct page *page, * @page: the page to add the mapping to * @vma: the vm area in which the mapping is added * @address: the user virtual address mapped + * @exclusive: the page is exclusively owned by the current process */ static void __page_set_anon_rmap(struct page *page, - struct vm_area_struct *vma, unsigned long address) + struct vm_area_struct *vma, unsigned long address, int exclusive) { - struct anon_vma_chain *avc; - struct anon_vma *anon_vma; + struct anon_vma *anon_vma = vma->anon_vma; - BUG_ON(!vma->anon_vma); + BUG_ON(!anon_vma); /* - * We must use the _oldest_ possible anon_vma for the page mapping! + * If the page isn't exclusively mapped into this vma, + * we must use the _oldest_ possible anon_vma for the + * page mapping! * - * So take the last AVC chain entry in the vma, which is the deepest - * ancestor, and use the anon_vma from that. + * So take the last AVC chain entry in the vma, which is + * the deepest ancestor, and use the anon_vma from that. */ - avc = list_entry(vma->anon_vma_chain.prev, struct anon_vma_chain, same_vma); - anon_vma = avc->anon_vma; + if (!exclusive) { + struct anon_vma_chain *avc; + avc = list_entry(vma->anon_vma_chain.prev, struct anon_vma_chain, same_vma); + anon_vma = avc->anon_vma; + } anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON; page->mapping = (struct address_space *) anon_vma; @@ -802,7 +807,7 @@ void page_add_anon_rmap(struct page *page, VM_BUG_ON(!PageLocked(page)); VM_BUG_ON(address < vma->vm_start || address >= vma->vm_end); if (first) - __page_set_anon_rmap(page, vma, address); + __page_set_anon_rmap(page, vma, address, 0); else __page_check_anon_rmap(page, vma, address); } @@ -824,7 +829,7 @@ void page_add_new_anon_rmap(struct page *page, SetPageSwapBacked(page); atomic_set(&page->_mapcount, 0); /* increment count (starts at -1) */ __inc_zone_page_state(page, NR_ANON_PAGES); - __page_set_anon_rmap(page, vma, address); + __page_set_anon_rmap(page, vma, address, 1); if (page_evictable(page, vma)) lru_cache_add_lru(page, LRU_ACTIVE_ANON); else diff --git a/net/core/dev.c b/net/core/dev.c index 1c8a0ce473a8..92584bfef09b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1989,8 +1989,12 @@ static struct netdev_queue *dev_pick_tx(struct net_device *dev, if (dev->real_num_tx_queues > 1) queue_index = skb_tx_hash(dev, skb); - if (sk && sk->sk_dst_cache) - sk_tx_queue_set(sk, queue_index); + if (sk) { + struct dst_entry *dst = rcu_dereference(sk->sk_dst_cache); + + if (dst && skb_dst(skb) == dst) + sk_tx_queue_set(sk, queue_index); + } } } diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 59a838795e3e..c98f115fb0fd 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -209,7 +209,9 @@ static inline struct node *tnode_get_child_rcu(struct tnode *tn, unsigned int i) { struct node *ret = tnode_get_child(tn, i); - return rcu_dereference(ret); + return rcu_dereference_check(ret, + rcu_read_lock_held() || + lockdep_rtnl_is_held()); } static inline int tnode_child_length(const struct tnode *tn) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index c65f18e0936e..d1bcc9f21d4f 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -120,7 +120,7 @@ static int ip_dev_loopback_xmit(struct sk_buff *newskb) newskb->pkt_type = PACKET_LOOPBACK; newskb->ip_summed = CHECKSUM_UNNECESSARY; WARN_ON(!skb_dst(newskb)); - netif_rx(newskb); + netif_rx_ni(newskb); return 0; } diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 16c4391f952b..65f9c379df38 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -108,7 +108,7 @@ static int ip6_dev_loopback_xmit(struct sk_buff *newskb) newskb->ip_summed = CHECKSUM_UNNECESSARY; WARN_ON(!skb_dst(newskb)); - netif_rx(newskb); + netif_rx_ni(newskb); return 0; } diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index cc90363d7e7a..243946d4809d 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2169,8 +2169,6 @@ static int packet_ioctl(struct socket *sock, unsigned int cmd, case SIOCGIFDSTADDR: case SIOCSIFDSTADDR: case SIOCSIFFLAGS: - if (!net_eq(sock_net(sk), &init_net)) - return -ENOIOCTLCMD; return inet_dgram_ops.ioctl(sock, cmd, arg); #endif diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 5a0cd194dce0..c82ae2492634 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -341,7 +341,11 @@ static void kvm_mmu_notifier_release(struct mmu_notifier *mn, struct mm_struct *mm) { struct kvm *kvm = mmu_notifier_to_kvm(mn); + int idx; + + idx = srcu_read_lock(&kvm->srcu); kvm_arch_flush_shadow(kvm); + srcu_read_unlock(&kvm->srcu, idx); } static const struct mmu_notifier_ops kvm_mmu_notifier_ops = { @@ -648,7 +652,7 @@ skip_lpage: /* Allocate page dirty bitmap if needed */ if ((new.flags & KVM_MEM_LOG_DIRTY_PAGES) && !new.dirty_bitmap) { - unsigned dirty_bytes = ALIGN(npages, BITS_PER_LONG) / 8; + unsigned long dirty_bytes = kvm_dirty_bitmap_bytes(&new); new.dirty_bitmap = vmalloc(dirty_bytes); if (!new.dirty_bitmap) @@ -768,7 +772,7 @@ int kvm_get_dirty_log(struct kvm *kvm, { struct kvm_memory_slot *memslot; int r, i; - int n; + unsigned long n; unsigned long any = 0; r = -EINVAL; @@ -780,7 +784,7 @@ int kvm_get_dirty_log(struct kvm *kvm, if (!memslot->dirty_bitmap) goto out; - n = ALIGN(memslot->npages, BITS_PER_LONG) / 8; + n = kvm_dirty_bitmap_bytes(memslot); for (i = 0; !any && i < n/sizeof(long); ++i) any = memslot->dirty_bitmap[i]; @@ -1186,10 +1190,13 @@ void mark_page_dirty(struct kvm *kvm, gfn_t gfn) memslot = gfn_to_memslot_unaliased(kvm, gfn); if (memslot && memslot->dirty_bitmap) { unsigned long rel_gfn = gfn - memslot->base_gfn; + unsigned long *p = memslot->dirty_bitmap + + rel_gfn / BITS_PER_LONG; + int offset = rel_gfn % BITS_PER_LONG; /* avoid RMW */ - if (!generic_test_le_bit(rel_gfn, memslot->dirty_bitmap)) - generic___set_le_bit(rel_gfn, memslot->dirty_bitmap); + if (!generic_test_le_bit(offset, p)) + generic___set_le_bit(offset, p); } } |