diff options
124 files changed, 1266 insertions, 699 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index fce04d747a1a..38f44389ad99 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9074,7 +9074,7 @@ S: Maintained F: drivers/usb/mtu3/ MEGACHIPS STDPXXXX-GE-B850V3-FW LVDS/DP++ BRIDGES -M: Peter Senna Tschudin <peter.senna@collabora.com> +M: Peter Senna Tschudin <peter.senna@gmail.com> M: Martin Donnelly <martin.donnelly@ge.com> M: Martyn Welch <martyn.welch@collabora.co.uk> S: Maintained @@ -2,7 +2,7 @@ VERSION = 4 PATCHLEVEL = 18 SUBLEVEL = 0 -EXTRAVERSION = -rc5 +EXTRAVERSION = -rc6 NAME = Merciless Moray # *DOCUMENTATION* diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c index 6e921754c8fc..c210a25dd6da 100644 --- a/arch/alpha/kernel/osf_sys.c +++ b/arch/alpha/kernel/osf_sys.c @@ -1180,13 +1180,10 @@ SYSCALL_DEFINE2(osf_getrusage, int, who, struct rusage32 __user *, ru) SYSCALL_DEFINE4(osf_wait4, pid_t, pid, int __user *, ustatus, int, options, struct rusage32 __user *, ur) { - unsigned int status = 0; struct rusage r; - long err = kernel_wait4(pid, &status, options, &r); + long err = kernel_wait4(pid, ustatus, options, &r); if (err <= 0) return err; - if (put_user(status, ustatus)) - return -EFAULT; if (!ur) return err; if (put_tv_to_tv32(&ur->ru_utime, &r.ru_utime)) diff --git a/arch/arm/boot/dts/imx6qdl-zii-rdu2.dtsi b/arch/arm/boot/dts/imx6qdl-zii-rdu2.dtsi index 19a075aee19e..f14df0baf2ab 100644 --- a/arch/arm/boot/dts/imx6qdl-zii-rdu2.dtsi +++ b/arch/arm/boot/dts/imx6qdl-zii-rdu2.dtsi @@ -692,7 +692,7 @@ dsa,member = <0 0>; eeprom-length = <512>; interrupt-parent = <&gpio6>; - interrupts = <3 IRQ_TYPE_EDGE_FALLING>; + interrupts = <3 IRQ_TYPE_LEVEL_LOW>; interrupt-controller; #interrupt-cells = <2>; diff --git a/arch/arm/boot/dts/omap4-droid4-xt894.dts b/arch/arm/boot/dts/omap4-droid4-xt894.dts index bdf73cbcec3a..e7c3c563ff8f 100644 --- a/arch/arm/boot/dts/omap4-droid4-xt894.dts +++ b/arch/arm/boot/dts/omap4-droid4-xt894.dts @@ -159,13 +159,7 @@ dais = <&mcbsp2_port>, <&mcbsp3_port>; }; -}; - -&dss { - status = "okay"; -}; -&gpio6 { pwm8: dmtimer-pwm-8 { pinctrl-names = "default"; pinctrl-0 = <&vibrator_direction_pin>; @@ -192,7 +186,10 @@ pwm-names = "enable", "direction"; direction-duty-cycle-ns = <10000000>; }; +}; +&dss { + status = "okay"; }; &dsi1 { diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index 3b38c717008a..46bff1661836 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -2278,17 +2278,15 @@ pfm_smpl_buffer_alloc(struct task_struct *task, struct file *filp, pfm_context_t DPRINT(("smpl_buf @%p\n", smpl_buf)); /* allocate vma */ - vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); + vma = vm_area_alloc(mm); if (!vma) { DPRINT(("Cannot allocate vma\n")); goto error_kmem; } - INIT_LIST_HEAD(&vma->anon_vma_chain); /* * partially initialize the vma for the sampling buffer */ - vma->vm_mm = mm; vma->vm_file = get_file(filp); vma->vm_flags = VM_READ|VM_MAYREAD|VM_DONTEXPAND|VM_DONTDUMP; vma->vm_page_prot = PAGE_READONLY; /* XXX may need to change */ @@ -2346,7 +2344,7 @@ pfm_smpl_buffer_alloc(struct task_struct *task, struct file *filp, pfm_context_t return 0; error: - kmem_cache_free(vm_area_cachep, vma); + vm_area_free(vma); error_kmem: pfm_rvfree(smpl_buf, size); diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index 18278b448530..bdb14a369137 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -114,10 +114,8 @@ ia64_init_addr_space (void) * the problem. When the process attempts to write to the register backing store * for the first time, it will get a SEGFAULT in this case. */ - vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); + vma = vm_area_alloc(current->mm); if (vma) { - INIT_LIST_HEAD(&vma->anon_vma_chain); - vma->vm_mm = current->mm; vma->vm_start = current->thread.rbs_bot & PAGE_MASK; vma->vm_end = vma->vm_start + PAGE_SIZE; vma->vm_flags = VM_DATA_DEFAULT_FLAGS|VM_GROWSUP|VM_ACCOUNT; @@ -125,7 +123,7 @@ ia64_init_addr_space (void) down_write(¤t->mm->mmap_sem); if (insert_vm_struct(current->mm, vma)) { up_write(¤t->mm->mmap_sem); - kmem_cache_free(vm_area_cachep, vma); + vm_area_free(vma); return; } up_write(¤t->mm->mmap_sem); @@ -133,10 +131,8 @@ ia64_init_addr_space (void) /* map NaT-page at address zero to speed up speculative dereferencing of NULL: */ if (!(current->personality & MMAP_PAGE_ZERO)) { - vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); + vma = vm_area_alloc(current->mm); if (vma) { - INIT_LIST_HEAD(&vma->anon_vma_chain); - vma->vm_mm = current->mm; vma->vm_end = PAGE_SIZE; vma->vm_page_prot = __pgprot(pgprot_val(PAGE_READONLY) | _PAGE_MA_NAT); vma->vm_flags = VM_READ | VM_MAYREAD | VM_IO | @@ -144,7 +140,7 @@ ia64_init_addr_space (void) down_write(¤t->mm->mmap_sem); if (insert_vm_struct(current->mm, vma)) { up_write(¤t->mm->mmap_sem); - kmem_cache_free(vm_area_cachep, vma); + vm_area_free(vma); return; } up_write(¤t->mm->mmap_sem); diff --git a/arch/mips/ath79/common.c b/arch/mips/ath79/common.c index 10a405d593df..c782b10ddf50 100644 --- a/arch/mips/ath79/common.c +++ b/arch/mips/ath79/common.c @@ -58,7 +58,7 @@ EXPORT_SYMBOL_GPL(ath79_ddr_ctrl_init); void ath79_ddr_wb_flush(u32 reg) { - void __iomem *flush_reg = ath79_ddr_wb_flush_base + reg; + void __iomem *flush_reg = ath79_ddr_wb_flush_base + (reg * 4); /* Flush the DDR write buffer. */ __raw_writel(0x1, flush_reg); diff --git a/arch/mips/pci/pci.c b/arch/mips/pci/pci.c index 9632436d74d7..c2e94cf5ecda 100644 --- a/arch/mips/pci/pci.c +++ b/arch/mips/pci/pci.c @@ -54,5 +54,5 @@ void pci_resource_to_user(const struct pci_dev *dev, int bar, phys_addr_t size = resource_size(rsrc); *start = fixup_bigphys_addr(rsrc->start, size); - *end = rsrc->start + size; + *end = rsrc->start + size - 1; } diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 2ea575cb3401..fb96206de317 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -243,6 +243,7 @@ endif cpu-as-$(CONFIG_4xx) += -Wa,-m405 cpu-as-$(CONFIG_ALTIVEC) += $(call as-option,-Wa$(comma)-maltivec) cpu-as-$(CONFIG_E200) += -Wa,-me200 +cpu-as-$(CONFIG_E500) += -Wa,-me500 cpu-as-$(CONFIG_PPC_BOOK3S_64) += -Wa,-mpower4 cpu-as-$(CONFIG_PPC_E500MC) += $(call as-option,-Wa$(comma)-me500mc) diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index 896efa559996..79d570cbf332 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -35,9 +35,9 @@ extern struct mm_iommu_table_group_mem_t *mm_iommu_lookup_rm( extern struct mm_iommu_table_group_mem_t *mm_iommu_find(struct mm_struct *mm, unsigned long ua, unsigned long entries); extern long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem, - unsigned long ua, unsigned long *hpa); + unsigned long ua, unsigned int pageshift, unsigned long *hpa); extern long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem, - unsigned long ua, unsigned long *hpa); + unsigned long ua, unsigned int pageshift, unsigned long *hpa); extern long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem); extern void mm_iommu_mapped_dec(struct mm_iommu_table_group_mem_t *mem); #endif diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S index e734f6e45abc..689306118b48 100644 --- a/arch/powerpc/kernel/idle_book3s.S +++ b/arch/powerpc/kernel/idle_book3s.S @@ -144,7 +144,9 @@ power9_restore_additional_sprs: mtspr SPRN_MMCR1, r4 ld r3, STOP_MMCR2(r13) + ld r4, PACA_SPRG_VDSO(r13) mtspr SPRN_MMCR2, r3 + mtspr SPRN_SPRG3, r4 blr /* diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c index d066e37551ec..8c456fa691a5 100644 --- a/arch/powerpc/kvm/book3s_64_vio.c +++ b/arch/powerpc/kvm/book3s_64_vio.c @@ -449,7 +449,7 @@ long kvmppc_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl, /* This only handles v2 IOMMU type, v1 is handled via ioctl() */ return H_TOO_HARD; - if (WARN_ON_ONCE(mm_iommu_ua_to_hpa(mem, ua, &hpa))) + if (WARN_ON_ONCE(mm_iommu_ua_to_hpa(mem, ua, tbl->it_page_shift, &hpa))) return H_HARDWARE; if (mm_iommu_mapped_inc(mem)) diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c index 925fc316a104..5b298f5a1a14 100644 --- a/arch/powerpc/kvm/book3s_64_vio_hv.c +++ b/arch/powerpc/kvm/book3s_64_vio_hv.c @@ -279,7 +279,8 @@ static long kvmppc_rm_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl, if (!mem) return H_TOO_HARD; - if (WARN_ON_ONCE_RM(mm_iommu_ua_to_hpa_rm(mem, ua, &hpa))) + if (WARN_ON_ONCE_RM(mm_iommu_ua_to_hpa_rm(mem, ua, tbl->it_page_shift, + &hpa))) return H_HARDWARE; pua = (void *) vmalloc_to_phys(pua); @@ -469,7 +470,8 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, mem = mm_iommu_lookup_rm(vcpu->kvm->mm, ua, IOMMU_PAGE_SIZE_4K); if (mem) - prereg = mm_iommu_ua_to_hpa_rm(mem, ua, &tces) == 0; + prereg = mm_iommu_ua_to_hpa_rm(mem, ua, + IOMMU_PAGE_SHIFT_4K, &tces) == 0; } if (!prereg) { diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c index abb43646927a..a4ca57612558 100644 --- a/arch/powerpc/mm/mmu_context_iommu.c +++ b/arch/powerpc/mm/mmu_context_iommu.c @@ -19,6 +19,7 @@ #include <linux/hugetlb.h> #include <linux/swap.h> #include <asm/mmu_context.h> +#include <asm/pte-walk.h> static DEFINE_MUTEX(mem_list_mutex); @@ -27,6 +28,7 @@ struct mm_iommu_table_group_mem_t { struct rcu_head rcu; unsigned long used; atomic64_t mapped; + unsigned int pageshift; u64 ua; /* userspace address */ u64 entries; /* number of entries in hpas[] */ u64 *hpas; /* vmalloc'ed */ @@ -125,6 +127,8 @@ long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries, { struct mm_iommu_table_group_mem_t *mem; long i, j, ret = 0, locked_entries = 0; + unsigned int pageshift; + unsigned long flags; struct page *page = NULL; mutex_lock(&mem_list_mutex); @@ -159,6 +163,12 @@ long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries, goto unlock_exit; } + /* + * For a starting point for a maximum page size calculation + * we use @ua and @entries natural alignment to allow IOMMU pages + * smaller than huge pages but still bigger than PAGE_SIZE. + */ + mem->pageshift = __ffs(ua | (entries << PAGE_SHIFT)); mem->hpas = vzalloc(array_size(entries, sizeof(mem->hpas[0]))); if (!mem->hpas) { kfree(mem); @@ -199,6 +209,23 @@ long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries, } } populate: + pageshift = PAGE_SHIFT; + if (PageCompound(page)) { + pte_t *pte; + struct page *head = compound_head(page); + unsigned int compshift = compound_order(head); + + local_irq_save(flags); /* disables as well */ + pte = find_linux_pte(mm->pgd, ua, NULL, &pageshift); + local_irq_restore(flags); + + /* Double check it is still the same pinned page */ + if (pte && pte_page(*pte) == head && + pageshift == compshift) + pageshift = max_t(unsigned int, pageshift, + PAGE_SHIFT); + } + mem->pageshift = min(mem->pageshift, pageshift); mem->hpas[i] = page_to_pfn(page) << PAGE_SHIFT; } @@ -349,7 +376,7 @@ struct mm_iommu_table_group_mem_t *mm_iommu_find(struct mm_struct *mm, EXPORT_SYMBOL_GPL(mm_iommu_find); long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem, - unsigned long ua, unsigned long *hpa) + unsigned long ua, unsigned int pageshift, unsigned long *hpa) { const long entry = (ua - mem->ua) >> PAGE_SHIFT; u64 *va = &mem->hpas[entry]; @@ -357,6 +384,9 @@ long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem, if (entry >= mem->entries) return -EFAULT; + if (pageshift > mem->pageshift) + return -EFAULT; + *hpa = *va | (ua & ~PAGE_MASK); return 0; @@ -364,7 +394,7 @@ long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem, EXPORT_SYMBOL_GPL(mm_iommu_ua_to_hpa); long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem, - unsigned long ua, unsigned long *hpa) + unsigned long ua, unsigned int pageshift, unsigned long *hpa) { const long entry = (ua - mem->ua) >> PAGE_SHIFT; void *va = &mem->hpas[entry]; @@ -373,6 +403,9 @@ long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem, if (entry >= mem->entries) return -EFAULT; + if (pageshift > mem->pageshift) + return -EFAULT; + pa = (void *) vmalloc_to_phys(va); if (!pa) return -EFAULT; diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 380cbf9a40d9..c0a9bcd28356 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -286,6 +286,7 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u64 imm64; u8 *func; u32 true_cond; + u32 tmp_idx; /* * addrs[] maps a BPF bytecode address into a real offset from @@ -637,11 +638,7 @@ emit_clear: case BPF_STX | BPF_XADD | BPF_W: /* Get EA into TMP_REG_1 */ PPC_ADDI(b2p[TMP_REG_1], dst_reg, off); - /* error if EA is not word-aligned */ - PPC_ANDI(b2p[TMP_REG_2], b2p[TMP_REG_1], 0x03); - PPC_BCC_SHORT(COND_EQ, (ctx->idx * 4) + 12); - PPC_LI(b2p[BPF_REG_0], 0); - PPC_JMP(exit_addr); + tmp_idx = ctx->idx * 4; /* load value from memory into TMP_REG_2 */ PPC_BPF_LWARX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1], 0); /* add value from src_reg into this */ @@ -649,32 +646,16 @@ emit_clear: /* store result back */ PPC_BPF_STWCX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1]); /* we're done if this succeeded */ - PPC_BCC_SHORT(COND_EQ, (ctx->idx * 4) + (7*4)); - /* otherwise, let's try once more */ - PPC_BPF_LWARX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1], 0); - PPC_ADD(b2p[TMP_REG_2], b2p[TMP_REG_2], src_reg); - PPC_BPF_STWCX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1]); - /* exit if the store was not successful */ - PPC_LI(b2p[BPF_REG_0], 0); - PPC_BCC(COND_NE, exit_addr); + PPC_BCC_SHORT(COND_NE, tmp_idx); break; /* *(u64 *)(dst + off) += src */ case BPF_STX | BPF_XADD | BPF_DW: PPC_ADDI(b2p[TMP_REG_1], dst_reg, off); - /* error if EA is not doubleword-aligned */ - PPC_ANDI(b2p[TMP_REG_2], b2p[TMP_REG_1], 0x07); - PPC_BCC_SHORT(COND_EQ, (ctx->idx * 4) + (3*4)); - PPC_LI(b2p[BPF_REG_0], 0); - PPC_JMP(exit_addr); - PPC_BPF_LDARX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1], 0); - PPC_ADD(b2p[TMP_REG_2], b2p[TMP_REG_2], src_reg); - PPC_BPF_STDCX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1]); - PPC_BCC_SHORT(COND_EQ, (ctx->idx * 4) + (7*4)); + tmp_idx = ctx->idx * 4; PPC_BPF_LDARX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1], 0); PPC_ADD(b2p[TMP_REG_2], b2p[TMP_REG_2], src_reg); PPC_BPF_STDCX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1]); - PPC_LI(b2p[BPF_REG_0], 0); - PPC_BCC(COND_NE, exit_addr); + PPC_BCC_SHORT(COND_NE, tmp_idx); break; /* diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 47166ad2a669..196978733e64 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -2734,7 +2734,7 @@ generic_inst_dump(unsigned long adr, long count, int praddr, { int nr, dotted; unsigned long first_adr; - unsigned long inst, last_inst = 0; + unsigned int inst, last_inst = 0; unsigned char val[4]; dotted = 0; @@ -2758,7 +2758,7 @@ generic_inst_dump(unsigned long adr, long count, int praddr, dotted = 0; last_inst = inst; if (praddr) - printf(REG" %.8lx", adr, inst); + printf(REG" %.8x", adr, inst); printf("\t"); dump_func(inst, adr); printf("\n"); diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index e44bb2b2873e..8a1863d9ed53 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -140,7 +140,7 @@ config S390 select HAVE_FUNCTION_GRAPH_TRACER select HAVE_FUNCTION_TRACER select HAVE_FUTEX_CMPXCHG if FUTEX - select HAVE_GCC_PLUGINS + select HAVE_GCC_PLUGINS if BROKEN select HAVE_KERNEL_BZIP2 select HAVE_KERNEL_GZIP select HAVE_KERNEL_LZ4 diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index f1dbb4ee19d7..887d3a7bb646 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -63,7 +63,7 @@ config X86 select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_REFCOUNT select ARCH_HAS_UACCESS_FLUSHCACHE if X86_64 - select ARCH_HAS_UACCESS_MCSAFE if X86_64 + select ARCH_HAS_UACCESS_MCSAFE if X86_64 && X86_MCE select ARCH_HAS_SET_MEMORY select ARCH_HAS_SG_CHAIN select ARCH_HAS_STRICT_KERNEL_RWX diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 8a10a045b57b..8cf03f101938 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -408,9 +408,11 @@ static int alloc_bts_buffer(int cpu) ds->bts_buffer_base = (unsigned long) cea; ds_update_cea(cea, buffer, BTS_BUFFER_SIZE, PAGE_KERNEL); ds->bts_index = ds->bts_buffer_base; - max = BTS_RECORD_SIZE * (BTS_BUFFER_SIZE / BTS_RECORD_SIZE); - ds->bts_absolute_maximum = ds->bts_buffer_base + max; - ds->bts_interrupt_threshold = ds->bts_absolute_maximum - (max / 16); + max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE; + ds->bts_absolute_maximum = ds->bts_buffer_base + + max * BTS_RECORD_SIZE; + ds->bts_interrupt_threshold = ds->bts_absolute_maximum - + (max / 16) * BTS_RECORD_SIZE; return 0; } diff --git a/arch/x86/include/asm/apm.h b/arch/x86/include/asm/apm.h index c356098b6fb9..4d4015ddcf26 100644 --- a/arch/x86/include/asm/apm.h +++ b/arch/x86/include/asm/apm.h @@ -7,8 +7,6 @@ #ifndef _ASM_X86_MACH_DEFAULT_APM_H #define _ASM_X86_MACH_DEFAULT_APM_H -#include <asm/nospec-branch.h> - #ifdef APM_ZERO_SEGS # define APM_DO_ZERO_SEGS \ "pushl %%ds\n\t" \ @@ -34,7 +32,6 @@ static inline void apm_bios_call_asm(u32 func, u32 ebx_in, u32 ecx_in, * N.B. We do NOT need a cld after the BIOS call * because we always save and restore the flags. */ - firmware_restrict_branch_speculation_start(); __asm__ __volatile__(APM_DO_ZERO_SEGS "pushl %%edi\n\t" "pushl %%ebp\n\t" @@ -47,7 +44,6 @@ static inline void apm_bios_call_asm(u32 func, u32 ebx_in, u32 ecx_in, "=S" (*esi) : "a" (func), "b" (ebx_in), "c" (ecx_in) : "memory", "cc"); - firmware_restrict_branch_speculation_end(); } static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in, @@ -60,7 +56,6 @@ static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in, * N.B. We do NOT need a cld after the BIOS call * because we always save and restore the flags. */ - firmware_restrict_branch_speculation_start(); __asm__ __volatile__(APM_DO_ZERO_SEGS "pushl %%edi\n\t" "pushl %%ebp\n\t" @@ -73,7 +68,6 @@ static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in, "=S" (si) : "a" (func), "b" (ebx_in), "c" (ecx_in) : "memory", "cc"); - firmware_restrict_branch_speculation_end(); return error; } diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index 62acb613114b..a9d637bc301d 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -52,7 +52,12 @@ copy_to_user_mcsafe(void *to, const void *from, unsigned len) unsigned long ret; __uaccess_begin(); - ret = memcpy_mcsafe(to, from, len); + /* + * Note, __memcpy_mcsafe() is explicitly used since it can + * handle exceptions / faults. memcpy_mcsafe() may fall back to + * memcpy() which lacks this handling. + */ + ret = __memcpy_mcsafe(to, from, len); __uaccess_end(); return ret; } diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index 5d0de79fdab0..ec00d1ff5098 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -240,6 +240,7 @@ #include <asm/olpc.h> #include <asm/paravirt.h> #include <asm/reboot.h> +#include <asm/nospec-branch.h> #if defined(CONFIG_APM_DISPLAY_BLANK) && defined(CONFIG_VT) extern int (*console_blank_hook)(int); @@ -614,11 +615,13 @@ static long __apm_bios_call(void *_call) gdt[0x40 / 8] = bad_bios_desc; apm_irq_save(flags); + firmware_restrict_branch_speculation_start(); APM_DO_SAVE_SEGS; apm_bios_call_asm(call->func, call->ebx, call->ecx, &call->eax, &call->ebx, &call->ecx, &call->edx, &call->esi); APM_DO_RESTORE_SEGS; + firmware_restrict_branch_speculation_end(); apm_irq_restore(flags); gdt[0x40 / 8] = save_desc_40; put_cpu(); @@ -690,10 +693,12 @@ static long __apm_bios_call_simple(void *_call) gdt[0x40 / 8] = bad_bios_desc; apm_irq_save(flags); + firmware_restrict_branch_speculation_start(); APM_DO_SAVE_SEGS; error = apm_bios_call_simple_asm(call->func, call->ebx, call->ecx, &call->eax); APM_DO_RESTORE_SEGS; + firmware_restrict_branch_speculation_end(); apm_irq_restore(flags); gdt[0x40 / 8] = save_desc_40; put_cpu(); diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index c102ad51025e..8c50754c09c1 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -2165,9 +2165,6 @@ static ssize_t store_int_with_restart(struct device *s, if (check_interval == old_check_interval) return ret; - if (check_interval < 1) - check_interval = 1; - mutex_lock(&mce_sysfs_mutex); mce_restart(); mutex_unlock(&mce_sysfs_mutex); diff --git a/drivers/gpu/drm/drm_lease.c b/drivers/gpu/drm/drm_lease.c index 50c73c0a20b9..d638c0fb3418 100644 --- a/drivers/gpu/drm/drm_lease.c +++ b/drivers/gpu/drm/drm_lease.c @@ -553,24 +553,13 @@ int drm_mode_create_lease_ioctl(struct drm_device *dev, /* Clone the lessor file to create a new file for us */ DRM_DEBUG_LEASE("Allocating lease file\n"); - path_get(&lessor_file->f_path); - lessee_file = alloc_file(&lessor_file->f_path, - lessor_file->f_mode, - fops_get(lessor_file->f_inode->i_fop)); - + lessee_file = filp_clone_open(lessor_file); if (IS_ERR(lessee_file)) { ret = PTR_ERR(lessee_file); goto out_lessee; } - /* Initialize the new file for DRM */ - DRM_DEBUG_LEASE("Initializing the file with %p\n", lessee_file->f_op->open); - ret = lessee_file->f_op->open(lessee_file->f_inode, lessee_file); - if (ret) - goto out_lessee_file; - lessee_priv = lessee_file->private_data; - /* Change the file to a master one */ drm_master_put(&lessee_priv->master); lessee_priv->master = lessee; @@ -588,9 +577,6 @@ int drm_mode_create_lease_ioctl(struct drm_device *dev, DRM_DEBUG_LEASE("drm_mode_create_lease_ioctl succeeded\n"); return 0; -out_lessee_file: - fput(lessee_file); - out_lessee: drm_master_put(&lessee); diff --git a/drivers/misc/cxl/api.c b/drivers/misc/cxl/api.c index 753b1a698fc4..6b16946f9b05 100644 --- a/drivers/misc/cxl/api.c +++ b/drivers/misc/cxl/api.c @@ -103,15 +103,15 @@ static struct file *cxl_getfile(const char *name, d_instantiate(path.dentry, inode); file = alloc_file(&path, OPEN_FMODE(flags), fops); - if (IS_ERR(file)) - goto err_dput; + if (IS_ERR(file)) { + path_put(&path); + goto err_fs; + } file->f_flags = flags & (O_ACCMODE | O_NONBLOCK); file->private_data = priv; return file; -err_dput: - path_put(&path); err_inode: iput(inode); err_fs: diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c index 98663c50ded0..4d5d01cb8141 100644 --- a/drivers/net/bonding/bond_options.c +++ b/drivers/net/bonding/bond_options.c @@ -743,15 +743,20 @@ const struct bond_option *bond_opt_get(unsigned int option) static int bond_option_mode_set(struct bonding *bond, const struct bond_opt_value *newval) { - if (!bond_mode_uses_arp(newval->value) && bond->params.arp_interval) { - netdev_dbg(bond->dev, "%s mode is incompatible with arp monitoring, start mii monitoring\n", - newval->string); - /* disable arp monitoring */ - bond->params.arp_interval = 0; - /* set miimon to default value */ - bond->params.miimon = BOND_DEFAULT_MIIMON; - netdev_dbg(bond->dev, "Setting MII monitoring interval to %d\n", - bond->params.miimon); + if (!bond_mode_uses_arp(newval->value)) { + if (bond->params.arp_interval) { + netdev_dbg(bond->dev, "%s mode is incompatible with arp monitoring, start mii monitoring\n", + newval->string); + /* disable arp monitoring */ + bond->params.arp_interval = 0; + } + + if (!bond->params.miimon) { + /* set miimon to default value */ + bond->params.miimon = BOND_DEFAULT_MIIMON; + netdev_dbg(bond->dev, "Setting MII monitoring interval to %d\n", + bond->params.miimon); + } } if (newval->value == BOND_MODE_ALB) diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index b397a33f3d32..9b449400376b 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -634,10 +634,12 @@ static int m_can_clk_start(struct m_can_priv *priv) int err; err = pm_runtime_get_sync(priv->device); - if (err) + if (err < 0) { pm_runtime_put_noidle(priv->device); + return err; + } - return err; + return 0; } static void m_can_clk_stop(struct m_can_priv *priv) @@ -1109,7 +1111,8 @@ static void m_can_chip_config(struct net_device *dev) } else { /* Version 3.1.x or 3.2.x */ - cccr &= ~(CCCR_TEST | CCCR_MON | CCCR_BRSE | CCCR_FDOE); + cccr &= ~(CCCR_TEST | CCCR_MON | CCCR_BRSE | CCCR_FDOE | + CCCR_NISO); /* Only 3.2.x has NISO Bit implemented */ if (priv->can.ctrlmode & CAN_CTRLMODE_FD_NON_ISO) @@ -1642,8 +1645,6 @@ static int m_can_plat_probe(struct platform_device *pdev) priv->can.clock.freq = clk_get_rate(cclk); priv->mram_base = mram_addr; - m_can_of_parse_mram(priv, mram_config_vals); - platform_set_drvdata(pdev, dev); SET_NETDEV_DEV(dev, &pdev->dev); @@ -1666,6 +1667,8 @@ static int m_can_plat_probe(struct platform_device *pdev) goto clk_disable; } + m_can_of_parse_mram(priv, mram_config_vals); + devm_can_led_init(dev); of_can_transceiver(dev); @@ -1687,8 +1690,6 @@ failed_ret: return ret; } -/* TODO: runtime PM with power down or sleep mode */ - static __maybe_unused int m_can_suspend(struct device *dev) { struct net_device *ndev = dev_get_drvdata(dev); @@ -1715,8 +1716,6 @@ static __maybe_unused int m_can_resume(struct device *dev) pinctrl_pm_select_default_state(dev); - m_can_init_ram(priv); - priv->can.state = CAN_STATE_ERROR_ACTIVE; if (netif_running(ndev)) { @@ -1726,6 +1725,7 @@ static __maybe_unused int m_can_resume(struct device *dev) if (ret) return ret; + m_can_init_ram(priv); m_can_start(ndev); netif_device_attach(ndev); netif_start_queue(ndev); diff --git a/drivers/net/can/mscan/mpc5xxx_can.c b/drivers/net/can/mscan/mpc5xxx_can.c index c7427bdd3a4b..2949a381a94d 100644 --- a/drivers/net/can/mscan/mpc5xxx_can.c +++ b/drivers/net/can/mscan/mpc5xxx_can.c @@ -86,6 +86,11 @@ static u32 mpc52xx_can_get_clock(struct platform_device *ofdev, return 0; } cdm = of_iomap(np_cdm, 0); + if (!cdm) { + of_node_put(np_cdm); + dev_err(&ofdev->dev, "can't map clock node!\n"); + return 0; + } if (in_8(&cdm->ipb_clk_sel) & 0x1) freq *= 2; diff --git a/drivers/net/can/peak_canfd/peak_pciefd_main.c b/drivers/net/can/peak_canfd/peak_pciefd_main.c index b9e28578bc7b..455a3797a200 100644 --- a/drivers/net/can/peak_canfd/peak_pciefd_main.c +++ b/drivers/net/can/peak_canfd/peak_pciefd_main.c @@ -58,6 +58,10 @@ MODULE_LICENSE("GPL v2"); #define PCIEFD_REG_SYS_VER1 0x0040 /* version reg #1 */ #define PCIEFD_REG_SYS_VER2 0x0044 /* version reg #2 */ +#define PCIEFD_FW_VERSION(x, y, z) (((u32)(x) << 24) | \ + ((u32)(y) << 16) | \ + ((u32)(z) << 8)) + /* System Control Registers Bits */ #define PCIEFD_SYS_CTL_TS_RST 0x00000001 /* timestamp clock */ #define PCIEFD_SYS_CTL_CLK_EN 0x00000002 /* system clock */ @@ -782,6 +786,21 @@ static int peak_pciefd_probe(struct pci_dev *pdev, "%ux CAN-FD PCAN-PCIe FPGA v%u.%u.%u:\n", can_count, hw_ver_major, hw_ver_minor, hw_ver_sub); +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT + /* FW < v3.3.0 DMA logic doesn't handle correctly the mix of 32-bit and + * 64-bit logical addresses: this workaround forces usage of 32-bit + * DMA addresses only when such a fw is detected. + */ + if (PCIEFD_FW_VERSION(hw_ver_major, hw_ver_minor, hw_ver_sub) < + PCIEFD_FW_VERSION(3, 3, 0)) { + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); + if (err) + dev_warn(&pdev->dev, + "warning: can't set DMA mask %llxh (err %d)\n", + DMA_BIT_MASK(32), err); + } +#endif + /* stop system clock */ pciefd_sys_writereg(pciefd, PCIEFD_SYS_CTL_CLK_EN, PCIEFD_REG_SYS_CTL_CLR); diff --git a/drivers/net/can/xilinx_can.c b/drivers/net/can/xilinx_can.c index 89aec07c225f..5a24039733ef 100644 --- a/drivers/net/can/xilinx_can.c +++ b/drivers/net/can/xilinx_can.c @@ -2,6 +2,7 @@ * * Copyright (C) 2012 - 2014 Xilinx, Inc. * Copyright (C) 2009 PetaLogix. All rights reserved. + * Copyright (C) 2017 Sandvik Mining and Construction Oy * * Description: * This driver is developed for Axi CAN IP and for Zynq CANPS Controller. @@ -25,8 +26,10 @@ #include <linux/module.h> #include <linux/netdevice.h> #include <linux/of.h> +#include <linux/of_device.h> #include <linux/platform_device.h> #include <linux/skbuff.h> +#include <linux/spinlock.h> #include <linux/string.h> #include <linux/types.h> #include <linux/can/dev.h> @@ -101,7 +104,7 @@ enum xcan_reg { #define XCAN_INTR_ALL (XCAN_IXR_TXOK_MASK | XCAN_IXR_BSOFF_MASK |\ XCAN_IXR_WKUP_MASK | XCAN_IXR_SLP_MASK | \ XCAN_IXR_RXNEMP_MASK | XCAN_IXR_ERROR_MASK | \ - XCAN_IXR_ARBLST_MASK | XCAN_IXR_RXOK_MASK) + XCAN_IXR_RXOFLW_MASK | XCAN_IXR_ARBLST_MASK) /* CAN register bit shift - XCAN_<REG>_<BIT>_SHIFT */ #define XCAN_BTR_SJW_SHIFT 7 /* Synchronous jump width */ @@ -118,6 +121,7 @@ enum xcan_reg { /** * struct xcan_priv - This definition define CAN driver instance * @can: CAN private data structure. + * @tx_lock: Lock for synchronizing TX interrupt handling * @tx_head: Tx CAN packets ready to send on the queue * @tx_tail: Tx CAN packets successfully sended on the queue * @tx_max: Maximum number packets the driver can send @@ -132,6 +136,7 @@ enum xcan_reg { */ struct xcan_priv { struct can_priv can; + spinlock_t tx_lock; unsigned int tx_head; unsigned int tx_tail; unsigned int tx_max; @@ -159,6 +164,11 @@ static const struct can_bittiming_const xcan_bittiming_const = { .brp_inc = 1, }; +#define XCAN_CAP_WATERMARK 0x0001 +struct xcan_devtype_data { + unsigned int caps; +}; + /** * xcan_write_reg_le - Write a value to the device register little endian * @priv: Driver private data structure @@ -238,6 +248,10 @@ static int set_reset_mode(struct net_device *ndev) usleep_range(500, 10000); } + /* reset clears FIFOs */ + priv->tx_head = 0; + priv->tx_tail = 0; + return 0; } @@ -392,6 +406,7 @@ static int xcan_start_xmit(struct sk_buff *skb, struct net_device *ndev) struct net_device_stats *stats = &ndev->stats; struct can_frame *cf = (struct can_frame *)skb->data; u32 id, dlc, data[2] = {0, 0}; + unsigned long flags; if (can_dropped_invalid_skb(ndev, skb)) return NETDEV_TX_OK; @@ -439,6 +454,9 @@ static int xcan_start_xmit(struct sk_buff *skb, struct net_device *ndev) data[1] = be32_to_cpup((__be32 *)(cf->data + 4)); can_put_echo_skb(skb, ndev, priv->tx_head % priv->tx_max); + + spin_lock_irqsave(&priv->tx_lock, flags); + priv->tx_head++; /* Write the Frame to Xilinx CAN TX FIFO */ @@ -454,10 +472,16 @@ static int xcan_start_xmit(struct sk_buff *skb, struct net_device *ndev) stats->tx_bytes += cf->can_dlc; } + /* Clear TX-FIFO-empty interrupt for xcan_tx_interrupt() */ + if (priv->tx_max > 1) + priv->write_reg(priv, XCAN_ICR_OFFSET, XCAN_IXR_TXFEMP_MASK); + /* Check if the TX buffer is full */ if ((priv->tx_head - priv->tx_tail) == priv->tx_max) netif_stop_queue(ndev); + spin_unlock_irqrestore(&priv->tx_lock, flags); + return NETDEV_TX_OK; } @@ -530,6 +554,123 @@ static int xcan_rx(struct net_device *ndev) } /** + * xcan_current_error_state - Get current error state from HW + * @ndev: Pointer to net_device structure + * + * Checks the current CAN error state from the HW. Note that this + * only checks for ERROR_PASSIVE and ERROR_WARNING. + * + * Return: + * ERROR_PASSIVE or ERROR_WARNING if either is active, ERROR_ACTIVE + * otherwise. + */ +static enum can_state xcan_current_error_state(struct net_device *ndev) +{ + struct xcan_priv *priv = netdev_priv(ndev); + u32 status = priv->read_reg(priv, XCAN_SR_OFFSET); + + if ((status & XCAN_SR_ESTAT_MASK) == XCAN_SR_ESTAT_MASK) + return CAN_STATE_ERROR_PASSIVE; + else if (status & XCAN_SR_ERRWRN_MASK) + return CAN_STATE_ERROR_WARNING; + else + return CAN_STATE_ERROR_ACTIVE; +} + +/** + * xcan_set_error_state - Set new CAN error state + * @ndev: Pointer to net_device structure + * @new_state: The new CAN state to be set + * @cf: Error frame to be populated or NULL + * + * Set new CAN error state for the device, updating statistics and + * populating the error frame if given. + */ +static void xcan_set_error_state(struct net_device *ndev, + enum can_state new_state, + struct can_frame *cf) +{ + struct xcan_priv *priv = netdev_priv(ndev); + u32 ecr = priv->read_reg(priv, XCAN_ECR_OFFSET); + u32 txerr = ecr & XCAN_ECR_TEC_MASK; + u32 rxerr = (ecr & XCAN_ECR_REC_MASK) >> XCAN_ESR_REC_SHIFT; + + priv->can.state = new_state; + + if (cf) { + cf->can_id |= CAN_ERR_CRTL; + cf->data[6] = txerr; + cf->data[7] = rxerr; + } + + switch (new_state) { + case CAN_STATE_ERROR_PASSIVE: + priv->can.can_stats.error_passive++; + if (cf) + cf->data[1] = (rxerr > 127) ? + CAN_ERR_CRTL_RX_PASSIVE : + CAN_ERR_CRTL_TX_PASSIVE; + break; + case CAN_STATE_ERROR_WARNING: + priv->can.can_stats.error_warning++; + if (cf) + cf->data[1] |= (txerr > rxerr) ? + CAN_ERR_CRTL_TX_WARNING : + CAN_ERR_CRTL_RX_WARNING; + break; + case CAN_STATE_ERROR_ACTIVE: + if (cf) + cf->data[1] |= CAN_ERR_CRTL_ACTIVE; + break; + default: + /* non-ERROR states are handled elsewhere */ + WARN_ON(1); + break; + } +} + +/** + * xcan_update_error_state_after_rxtx - Update CAN error state after RX/TX + * @ndev: Pointer to net_device structure + * + * If the device is in a ERROR-WARNING or ERROR-PASSIVE state, check if + * the performed RX/TX has caused it to drop to a lesser state and set + * the interface state accordingly. + */ +static void xcan_update_error_state_after_rxtx(struct net_device *ndev) +{ + struct xcan_priv *priv = netdev_priv(ndev); + enum can_state old_state = priv->can.state; + enum can_state new_state; + + /* changing error state due to successful frame RX/TX can only + * occur from these states + */ + if (old_state != CAN_STATE_ERROR_WARNING && + old_state != CAN_STATE_ERROR_PASSIVE) + return; + + new_state = xcan_current_error_state(ndev); + + if (new_state != old_state) { + struct sk_buff *skb; + struct can_frame *cf; + + skb = alloc_can_err_skb(ndev, &cf); + + xcan_set_error_state(ndev, new_state, skb ? cf : NULL); + + if (skb) { + struct net_device_stats *stats = &ndev->stats; + + stats->rx_packets++; + stats->rx_bytes += cf->can_dlc; + netif_rx(skb); + } + } +} + +/** * xcan_err_interrupt - error frame Isr * @ndev: net_device pointer * @isr: interrupt status register value @@ -544,16 +685,12 @@ static void xcan_err_interrupt(struct net_device *ndev, u32 isr) struct net_device_stats *stats = &ndev->stats; struct can_frame *cf; struct sk_buff *skb; - u32 err_status, status, txerr = 0, rxerr = 0; + u32 err_status; skb = alloc_can_err_skb(ndev, &cf); err_status = priv->read_reg(priv, XCAN_ESR_OFFSET); priv->write_reg(priv, XCAN_ESR_OFFSET, err_status); - txerr = priv->read_reg(priv, XCAN_ECR_OFFSET) & XCAN_ECR_TEC_MASK; - rxerr = ((priv->read_reg(priv, XCAN_ECR_OFFSET) & - XCAN_ECR_REC_MASK) >> XCAN_ESR_REC_SHIFT); - status = priv->read_reg(priv, XCAN_SR_OFFSET); if (isr & XCAN_IXR_BSOFF_MASK) { priv->can.state = CAN_STATE_BUS_OFF; @@ -563,28 +700,10 @@ static void xcan_err_interrupt(struct net_device *ndev, u32 isr) can_bus_off(ndev); if (skb) cf->can_id |= CAN_ERR_BUSOFF; - } else if ((status & XCAN_SR_ESTAT_MASK) == XCAN_SR_ESTAT_MASK) { - priv->can.state = CAN_STATE_ERROR_PASSIVE; - priv->can.can_stats.error_passive++; - if (skb) { - cf->can_id |= CAN_ERR_CRTL; - cf->data[1] = (rxerr > 127) ? - CAN_ERR_CRTL_RX_PASSIVE : - CAN_ERR_CRTL_TX_PASSIVE; - cf->data[6] = txerr; - cf->data[7] = rxerr; - } - } else if (status & XCAN_SR_ERRWRN_MASK) { - priv->can.state = CAN_STATE_ERROR_WARNING; - priv->can.can_stats.error_warning++; - if (skb) { - cf->can_id |= CAN_ERR_CRTL; - cf->data[1] |= (txerr > rxerr) ? - CAN_ERR_CRTL_TX_WARNING : - CAN_ERR_CRTL_RX_WARNING; - cf->data[6] = txerr; - cf->data[7] = rxerr; - } + } else { + enum can_state new_state = xcan_current_error_state(ndev); + + xcan_set_error_state(ndev, new_state, skb ? cf : NULL); } /* Check for Arbitration lost interrupt */ @@ -600,7 +719,6 @@ static void xcan_err_interrupt(struct net_device *ndev, u32 isr) if (isr & XCAN_IXR_RXOFLW_MASK) { stats->rx_over_errors++; stats->rx_errors++; - priv->write_reg(priv, XCAN_SRR_OFFSET, XCAN_SRR_RESET_MASK); if (skb) { cf->can_id |= CAN_ERR_CRTL; cf->data[1] |= CAN_ERR_CRTL_RX_OVERFLOW; @@ -709,26 +827,20 @@ static int xcan_rx_poll(struct napi_struct *napi, int quota) isr = priv->read_reg(priv, XCAN_ISR_OFFSET); while ((isr & XCAN_IXR_RXNEMP_MASK) && (work_done < quota)) { - if (isr & XCAN_IXR_RXOK_MASK) { - priv->write_reg(priv, XCAN_ICR_OFFSET, - XCAN_IXR_RXOK_MASK); - work_done += xcan_rx(ndev); - } else { - priv->write_reg(priv, XCAN_ICR_OFFSET, - XCAN_IXR_RXNEMP_MASK); - break; - } + work_done += xcan_rx(ndev); priv->write_reg(priv, XCAN_ICR_OFFSET, XCAN_IXR_RXNEMP_MASK); isr = priv->read_reg(priv, XCAN_ISR_OFFSET); } - if (work_done) + if (work_done) { can_led_event(ndev, CAN_LED_EVENT_RX); + xcan_update_error_state_after_rxtx(ndev); + } if (work_done < quota) { napi_complete_done(napi, work_done); ier = priv->read_reg(priv, XCAN_IER_OFFSET); - ier |= (XCAN_IXR_RXOK_MASK | XCAN_IXR_RXNEMP_MASK); + ier |= XCAN_IXR_RXNEMP_MASK; priv->write_reg(priv, XCAN_IER_OFFSET, ier); } return work_done; @@ -743,18 +855,71 @@ static void xcan_tx_interrupt(struct net_device *ndev, u32 isr) { struct xcan_priv *priv = netdev_priv(ndev); struct net_device_stats *stats = &ndev->stats; + unsigned int frames_in_fifo; + int frames_sent = 1; /* TXOK => at least 1 frame was sent */ + unsigned long flags; + int retries = 0; + + /* Synchronize with xmit as we need to know the exact number + * of frames in the FIFO to stay in sync due to the TXFEMP + * handling. + * This also prevents a race between netif_wake_queue() and + * netif_stop_queue(). + */ + spin_lock_irqsave(&priv->tx_lock, flags); + + frames_in_fifo = priv->tx_head - priv->tx_tail; + + if (WARN_ON_ONCE(frames_in_fifo == 0)) { + /* clear TXOK anyway to avoid getting back here */ + priv->write_reg(priv, XCAN_ICR_OFFSET, XCAN_IXR_TXOK_MASK); + spin_unlock_irqrestore(&priv->tx_lock, flags); + return; + } + + /* Check if 2 frames were sent (TXOK only means that at least 1 + * frame was sent). + */ + if (frames_in_fifo > 1) { + WARN_ON(frames_in_fifo > priv->tx_max); + + /* Synchronize TXOK and isr so that after the loop: + * (1) isr variable is up-to-date at least up to TXOK clear + * time. This avoids us clearing a TXOK of a second frame + * but not noticing that the FIFO is now empty and thus + * marking only a single frame as sent. + * (2) No TXOK is left. Having one could mean leaving a + * stray TXOK as we might process the associated frame + * via TXFEMP handling as we read TXFEMP *after* TXOK + * clear to satisfy (1). + */ + while ((isr & XCAN_IXR_TXOK_MASK) && !WARN_ON(++retries == 100)) { + priv->write_reg(priv, XCAN_ICR_OFFSET, XCAN_IXR_TXOK_MASK); + isr = priv->read_reg(priv, XCAN_ISR_OFFSET); + } - while ((priv->tx_head - priv->tx_tail > 0) && - (isr & XCAN_IXR_TXOK_MASK)) { + if (isr & XCAN_IXR_TXFEMP_MASK) { + /* nothing in FIFO anymore */ + frames_sent = frames_in_fifo; + } + } else { + /* single frame in fifo, just clear TXOK */ priv->write_reg(priv, XCAN_ICR_OFFSET, XCAN_IXR_TXOK_MASK); + } + + while (frames_sent--) { can_get_echo_skb(ndev, priv->tx_tail % priv->tx_max); priv->tx_tail++; stats->tx_packets++; - isr = priv->read_reg(priv, XCAN_ISR_OFFSET); } - can_led_event(ndev, CAN_LED_EVENT_TX); + netif_wake_queue(ndev); + + spin_unlock_irqrestore(&priv->tx_lock, flags); + + can_led_event(ndev, CAN_LED_EVENT_TX); + xcan_update_error_state_after_rxtx(ndev); } /** @@ -773,6 +938,7 @@ static irqreturn_t xcan_interrupt(int irq, void *dev_id) struct net_device *ndev = (struct net_device *)dev_id; struct xcan_priv *priv = netdev_priv(ndev); u32 isr, ier; + u32 isr_errors; /* Get the interrupt status from Xilinx CAN */ isr = priv->read_reg(priv, XCAN_ISR_OFFSET); @@ -791,18 +957,17 @@ static irqreturn_t xcan_interrupt(int irq, void *dev_id) xcan_tx_interrupt(ndev, isr); /* Check for the type of error interrupt and Processing it */ - if (isr & (XCAN_IXR_ERROR_MASK | XCAN_IXR_RXOFLW_MASK | - XCAN_IXR_BSOFF_MASK | XCAN_IXR_ARBLST_MASK)) { - priv->write_reg(priv, XCAN_ICR_OFFSET, (XCAN_IXR_ERROR_MASK | - XCAN_IXR_RXOFLW_MASK | XCAN_IXR_BSOFF_MASK | - XCAN_IXR_ARBLST_MASK)); + isr_errors = isr & (XCAN_IXR_ERROR_MASK | XCAN_IXR_RXOFLW_MASK | + XCAN_IXR_BSOFF_MASK | XCAN_IXR_ARBLST_MASK); + if (isr_errors) { + priv->write_reg(priv, XCAN_ICR_OFFSET, isr_errors); xcan_err_interrupt(ndev, isr); } /* Check for the type of receive interrupt and Processing it */ - if (isr & (XCAN_IXR_RXNEMP_MASK | XCAN_IXR_RXOK_MASK)) { + if (isr & XCAN_IXR_RXNEMP_MASK) { ier = priv->read_reg(priv, XCAN_IER_OFFSET); - ier &= ~(XCAN_IXR_RXNEMP_MASK | XCAN_IXR_RXOK_MASK); + ier &= ~XCAN_IXR_RXNEMP_MASK; priv->write_reg(priv, XCAN_IER_OFFSET, ier); napi_schedule(&priv->napi); } @@ -819,13 +984,9 @@ static irqreturn_t xcan_interrupt(int irq, void *dev_id) static void xcan_chip_stop(struct net_device *ndev) { struct xcan_priv *priv = netdev_priv(ndev); - u32 ier; /* Disable interrupts and leave the can in configuration mode */ - ier = priv->read_reg(priv, XCAN_IER_OFFSET); - ier &= ~XCAN_INTR_ALL; - priv->write_reg(priv, XCAN_IER_OFFSET, ier); - priv->write_reg(priv, XCAN_SRR_OFFSET, XCAN_SRR_RESET_MASK); + set_reset_mode(ndev); priv->can.state = CAN_STATE_STOPPED; } @@ -958,10 +1119,15 @@ static const struct net_device_ops xcan_netdev_ops = { */ static int __maybe_unused xcan_suspend(struct device *dev) { - if (!device_may_wakeup(dev)) - return pm_runtime_force_suspend(dev); + struct net_device *ndev = dev_get_drvdata(dev); - return 0; + if (netif_running(ndev)) { + netif_stop_queue(ndev); + netif_device_detach(ndev); + xcan_chip_stop(ndev); + } + + return pm_runtime_force_suspend(dev); } /** @@ -973,11 +1139,27 @@ static int __maybe_unused xcan_suspend(struct device *dev) */ static int __maybe_unused xcan_resume(struct device *dev) { - if (!device_may_wakeup(dev)) - return pm_runtime_force_resume(dev); + struct net_device *ndev = dev_get_drvdata(dev); + int ret; - return 0; + ret = pm_runtime_force_resume(dev); + if (ret) { + dev_err(dev, "pm_runtime_force_resume failed on resume\n"); + return ret; + } + + if (netif_running(ndev)) { + ret = xcan_chip_start(ndev); + if (ret) { + dev_err(dev, "xcan_chip_start failed on resume\n"); + return ret; + } + + netif_device_attach(ndev); + netif_start_queue(ndev); + } + return 0; } /** @@ -992,14 +1174,6 @@ static int __maybe_unused xcan_runtime_suspend(struct device *dev) struct net_device *ndev = dev_get_drvdata(dev); struct xcan_priv *priv = netdev_priv(ndev); - if (netif_running(ndev)) { - netif_stop_queue(ndev); - netif_device_detach(ndev); - } - - priv->write_reg(priv, XCAN_MSR_OFFSET, XCAN_MSR_SLEEP_MASK); - priv->can.state = CAN_STATE_SLEEPING; - clk_disable_unprepare(priv->bus_clk); clk_disable_unprepare(priv->can_clk); @@ -1018,7 +1192,6 @@ static int __maybe_unused xcan_runtime_resume(struct device *dev) struct net_device *ndev = dev_get_drvdata(dev); struct xcan_priv *priv = netdev_priv(ndev); int ret; - u32 isr, status; ret = clk_prepare_enable(priv->bus_clk); if (ret) { @@ -1032,27 +1205,6 @@ static int __maybe_unused xcan_runtime_resume(struct device *dev) return ret; } - priv->write_reg(priv, XCAN_SRR_OFFSET, XCAN_SRR_RESET_MASK); - isr = priv->read_reg(priv, XCAN_ISR_OFFSET); - status = priv->read_reg(priv, XCAN_SR_OFFSET); - - if (netif_running(ndev)) { - if (isr & XCAN_IXR_BSOFF_MASK) { - priv->can.state = CAN_STATE_BUS_OFF; - priv->write_reg(priv, XCAN_SRR_OFFSET, - XCAN_SRR_RESET_MASK); - } else if ((status & XCAN_SR_ESTAT_MASK) == - XCAN_SR_ESTAT_MASK) { - priv->can.state = CAN_STATE_ERROR_PASSIVE; - } else if (status & XCAN_SR_ERRWRN_MASK) { - priv->can.state = CAN_STATE_ERROR_WARNING; - } else { - priv->can.state = CAN_STATE_ERROR_ACTIVE; - } - netif_device_attach(ndev); - netif_start_queue(ndev); - } - return 0; } @@ -1061,6 +1213,18 @@ static const struct dev_pm_ops xcan_dev_pm_ops = { SET_RUNTIME_PM_OPS(xcan_runtime_suspend, xcan_runtime_resume, NULL) }; +static const struct xcan_devtype_data xcan_zynq_data = { + .caps = XCAN_CAP_WATERMARK, +}; + +/* Match table for OF platform binding */ +static const struct of_device_id xcan_of_match[] = { + { .compatible = "xlnx,zynq-can-1.0", .data = &xcan_zynq_data }, + { .compatible = "xlnx,axi-can-1.00.a", }, + { /* end of list */ }, +}; +MODULE_DEVICE_TABLE(of, xcan_of_match); + /** * xcan_probe - Platform registration call * @pdev: Handle to the platform device structure @@ -1075,8 +1239,10 @@ static int xcan_probe(struct platform_device *pdev) struct resource *res; /* IO mem resources */ struct net_device *ndev; struct xcan_priv *priv; + const struct of_device_id *of_id; + int caps = 0; void __iomem *addr; - int ret, rx_max, tx_max; + int ret, rx_max, tx_max, tx_fifo_depth; /* Get the virtual base address for the device */ res = platform_get_resource(pdev, IORESOURCE_MEM, 0); @@ -1086,7 +1252,8 @@ static int xcan_probe(struct platform_device *pdev) goto err; } - ret = of_property_read_u32(pdev->dev.of_node, "tx-fifo-depth", &tx_max); + ret = of_property_read_u32(pdev->dev.of_node, "tx-fifo-depth", + &tx_fifo_depth); if (ret < 0) goto err; @@ -1094,6 +1261,30 @@ static int xcan_probe(struct platform_device *pdev) if (ret < 0) goto err; + of_id = of_match_device(xcan_of_match, &pdev->dev); + if (of_id) { + const struct xcan_devtype_data *devtype_data = of_id->data; + + if (devtype_data) + caps = devtype_data->caps; + } + + /* There is no way to directly figure out how many frames have been + * sent when the TXOK interrupt is processed. If watermark programming + * is supported, we can have 2 frames in the FIFO and use TXFEMP + * to determine if 1 or 2 frames have been sent. + * Theoretically we should be able to use TXFWMEMP to determine up + * to 3 frames, but it seems that after putting a second frame in the + * FIFO, with watermark at 2 frames, it can happen that TXFWMEMP (less + * than 2 frames in FIFO) is set anyway with no TXOK (a frame was + * sent), which is not a sensible state - possibly TXFWMEMP is not + * completely synchronized with the rest of the bits? + */ + if (caps & XCAN_CAP_WATERMARK) + tx_max = min(tx_fifo_depth, 2); + else + tx_max = 1; + /* Create a CAN device instance */ ndev = alloc_candev(sizeof(struct xcan_priv), tx_max); if (!ndev) @@ -1108,6 +1299,7 @@ static int xcan_probe(struct platform_device *pdev) CAN_CTRLMODE_BERR_REPORTING; priv->reg_base = addr; priv->tx_max = tx_max; + spin_lock_init(&priv->tx_lock); /* Get IRQ for the device */ ndev->irq = platform_get_irq(pdev, 0); @@ -1172,9 +1364,9 @@ static int xcan_probe(struct platform_device *pdev) pm_runtime_put(&pdev->dev); - netdev_dbg(ndev, "reg_base=0x%p irq=%d clock=%d, tx fifo depth:%d\n", + netdev_dbg(ndev, "reg_base=0x%p irq=%d clock=%d, tx fifo depth: actual %d, using %d\n", priv->reg_base, ndev->irq, priv->can.clock.freq, - priv->tx_max); + tx_fifo_depth, priv->tx_max); return 0; @@ -1208,14 +1400,6 @@ static int xcan_remove(struct platform_device *pdev) return 0; } -/* Match table for OF platform binding */ -static const struct of_device_id xcan_of_match[] = { - { .compatible = "xlnx,zynq-can-1.0", }, - { .compatible = "xlnx,axi-can-1.00.a", }, - { /* end of list */ }, -}; -MODULE_DEVICE_TABLE(of, xcan_of_match); - static struct platform_driver xcan_driver = { .probe = xcan_probe, .remove = xcan_remove, diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 9dd270978064..a80767d3c405 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -343,6 +343,7 @@ static const struct irq_domain_ops mv88e6xxx_g1_irq_domain_ops = { .xlate = irq_domain_xlate_twocell, }; +/* To be called with reg_lock held */ static void mv88e6xxx_g1_irq_free_common(struct mv88e6xxx_chip *chip) { int irq, virq; @@ -362,9 +363,15 @@ static void mv88e6xxx_g1_irq_free_common(struct mv88e6xxx_chip *chip) static void mv88e6xxx_g1_irq_free(struct mv88e6xxx_chip *chip) { - mv88e6xxx_g1_irq_free_common(chip); - + /* + * free_irq must be called without reg_lock taken because the irq + * handler takes this lock, too. + */ free_irq(chip->irq, chip); + + mutex_lock(&chip->reg_lock); + mv88e6xxx_g1_irq_free_common(chip); + mutex_unlock(&chip->reg_lock); } static int mv88e6xxx_g1_irq_setup_common(struct mv88e6xxx_chip *chip) @@ -469,10 +476,12 @@ static int mv88e6xxx_irq_poll_setup(struct mv88e6xxx_chip *chip) static void mv88e6xxx_irq_poll_free(struct mv88e6xxx_chip *chip) { - mv88e6xxx_g1_irq_free_common(chip); - kthread_cancel_delayed_work_sync(&chip->irq_poll_work); kthread_destroy_worker(chip->kworker); + + mutex_lock(&chip->reg_lock); + mv88e6xxx_g1_irq_free_common(chip); + mutex_unlock(&chip->reg_lock); } int mv88e6xxx_wait(struct mv88e6xxx_chip *chip, int addr, int reg, u16 mask) @@ -4523,12 +4532,10 @@ out_g2_irq: if (chip->info->g2_irqs > 0) mv88e6xxx_g2_irq_free(chip); out_g1_irq: - mutex_lock(&chip->reg_lock); if (chip->irq > 0) mv88e6xxx_g1_irq_free(chip); else mv88e6xxx_irq_poll_free(chip); - mutex_unlock(&chip->reg_lock); out: if (pdata) dev_put(pdata->netdev); @@ -4556,12 +4563,10 @@ static void mv88e6xxx_remove(struct mdio_device *mdiodev) if (chip->info->g2_irqs > 0) mv88e6xxx_g2_irq_free(chip); - mutex_lock(&chip->reg_lock); if (chip->irq > 0) mv88e6xxx_g1_irq_free(chip); else mv88e6xxx_irq_poll_free(chip); - mutex_unlock(&chip->reg_lock); } static const struct of_device_id mv88e6xxx_of_match[] = { diff --git a/drivers/net/ethernet/3com/Kconfig b/drivers/net/ethernet/3com/Kconfig index 5b7658bcf020..5c3ef9fc8207 100644 --- a/drivers/net/ethernet/3com/Kconfig +++ b/drivers/net/ethernet/3com/Kconfig @@ -32,7 +32,7 @@ config EL3 config 3C515 tristate "3c515 ISA \"Fast EtherLink\"" - depends on ISA && ISA_DMA_API + depends on ISA && ISA_DMA_API && !PPC32 ---help--- If you have a 3Com ISA EtherLink XL "Corkscrew" 3c515 Fast Ethernet network card, say Y here. diff --git a/drivers/net/ethernet/amd/Kconfig b/drivers/net/ethernet/amd/Kconfig index f273af136fc7..9e5cf5583c87 100644 --- a/drivers/net/ethernet/amd/Kconfig +++ b/drivers/net/ethernet/amd/Kconfig @@ -44,7 +44,7 @@ config AMD8111_ETH config LANCE tristate "AMD LANCE and PCnet (AT1500 and NE2100) support" - depends on ISA && ISA_DMA_API && !ARM + depends on ISA && ISA_DMA_API && !ARM && !PPC32 ---help--- If you have a network (Ethernet) card of this type, say Y here. Some LinkSys cards are of this type. @@ -138,7 +138,7 @@ config PCMCIA_NMCLAN config NI65 tristate "NI6510 support" - depends on ISA && ISA_DMA_API && !ARM + depends on ISA && ISA_DMA_API && !ARM && !PPC32 ---help--- If you have a network (Ethernet) card of this type, say Y here. diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c index 94270f654b3b..7087b88550db 100644 --- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c +++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c @@ -1686,6 +1686,7 @@ static struct sk_buff *atl1c_alloc_skb(struct atl1c_adapter *adapter) skb = build_skb(page_address(page) + adapter->rx_page_offset, adapter->rx_frag_size); if (likely(skb)) { + skb_reserve(skb, NET_SKB_PAD); adapter->rx_page_offset += adapter->rx_frag_size; if (adapter->rx_page_offset >= PAGE_SIZE) adapter->rx_page = NULL; diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c index da18aa239acb..a4a90b6cdb46 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c @@ -3388,14 +3388,18 @@ static int bnx2x_set_rss_flags(struct bnx2x *bp, struct ethtool_rxnfc *info) DP(BNX2X_MSG_ETHTOOL, "rss re-configured, UDP 4-tupple %s\n", udp_rss_requested ? "enabled" : "disabled"); - return bnx2x_rss(bp, &bp->rss_conf_obj, false, true); + if (bp->state == BNX2X_STATE_OPEN) + return bnx2x_rss(bp, &bp->rss_conf_obj, false, + true); } else if ((info->flow_type == UDP_V6_FLOW) && (bp->rss_conf_obj.udp_rss_v6 != udp_rss_requested)) { bp->rss_conf_obj.udp_rss_v6 = udp_rss_requested; DP(BNX2X_MSG_ETHTOOL, "rss re-configured, UDP 4-tupple %s\n", udp_rss_requested ? "enabled" : "disabled"); - return bnx2x_rss(bp, &bp->rss_conf_obj, false, true); + if (bp->state == BNX2X_STATE_OPEN) + return bnx2x_rss(bp, &bp->rss_conf_obj, false, + true); } return 0; @@ -3509,7 +3513,10 @@ static int bnx2x_set_rxfh(struct net_device *dev, const u32 *indir, bp->rss_conf_obj.ind_table[i] = indir[i] + bp->fp->cl_id; } - return bnx2x_config_rss_eth(bp, false); + if (bp->state == BNX2X_STATE_OPEN) + return bnx2x_config_rss_eth(bp, false); + + return 0; } /** diff --git a/drivers/net/ethernet/cirrus/Kconfig b/drivers/net/ethernet/cirrus/Kconfig index 5ab912937aff..ec0b545197e2 100644 --- a/drivers/net/ethernet/cirrus/Kconfig +++ b/drivers/net/ethernet/cirrus/Kconfig @@ -19,6 +19,7 @@ if NET_VENDOR_CIRRUS config CS89x0 tristate "CS89x0 support" depends on ISA || EISA || ARM + depends on !PPC32 ---help--- Support for CS89x0 chipset based Ethernet cards. If you have a network (Ethernet) card of this type, say Y and read the file diff --git a/drivers/net/ethernet/huawei/hinic/hinic_tx.c b/drivers/net/ethernet/huawei/hinic/hinic_tx.c index 9128858479c4..2353ec829c04 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_tx.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_tx.c @@ -229,6 +229,7 @@ netdev_tx_t hinic_xmit_frame(struct sk_buff *skb, struct net_device *netdev) txq->txq_stats.tx_busy++; u64_stats_update_end(&txq->txq_stats.syncp); err = NETDEV_TX_BUSY; + wqe_size = 0; goto flush_skbs; } diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index 7b1b5ac986d0..31bd56727022 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -2958,7 +2958,7 @@ int mlx4_RST2INIT_QP_wrapper(struct mlx4_dev *dev, int slave, u32 srqn = qp_get_srqn(qpc) & 0xffffff; int use_srq = (qp_get_srqn(qpc) >> 24) & 1; struct res_srq *srq; - int local_qpn = be32_to_cpu(qpc->local_qpn) & 0xffffff; + int local_qpn = vhcr->in_modifier & 0xffffff; err = adjust_qp_sched_queue(dev, slave, qpc, inbox); if (err) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c index 323ffe8bf7e4..456f30007ad6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c @@ -123,7 +123,7 @@ int mlx5_frag_buf_alloc_node(struct mlx5_core_dev *dev, int size, int i; buf->size = size; - buf->npages = 1 << get_order(size); + buf->npages = DIV_ROUND_UP(size, PAGE_SIZE); buf->page_shift = PAGE_SHIFT; buf->frags = kcalloc(buf->npages, sizeof(struct mlx5_buf_list), GFP_KERNEL); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c index 75e4308ba786..d258bb679271 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c @@ -381,14 +381,14 @@ static void arfs_may_expire_flow(struct mlx5e_priv *priv) HLIST_HEAD(del_list); spin_lock_bh(&priv->fs.arfs.arfs_lock); mlx5e_for_each_arfs_rule(arfs_rule, htmp, priv->fs.arfs.arfs_tables, i, j) { - if (quota++ > MLX5E_ARFS_EXPIRY_QUOTA) - break; if (!work_pending(&arfs_rule->arfs_work) && rps_may_expire_flow(priv->netdev, arfs_rule->rxq, arfs_rule->flow_id, arfs_rule->filter_id)) { hlist_del_init(&arfs_rule->hlist); hlist_add_head(&arfs_rule->hlist, &del_list); + if (quota++ > MLX5E_ARFS_EXPIRY_QUOTA) + break; } } spin_unlock_bh(&priv->fs.arfs.arfs_lock); @@ -711,6 +711,9 @@ int mlx5e_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb, skb->protocol != htons(ETH_P_IPV6)) return -EPROTONOSUPPORT; + if (skb->encapsulation) + return -EPROTONOSUPPORT; + arfs_t = arfs_get_table(arfs, arfs_get_ip_proto(skb), skb->protocol); if (!arfs_t) return -EPROTONOSUPPORT; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c index 0a52f31fef37..86bc9ac99586 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c @@ -275,7 +275,8 @@ int mlx5e_dcbnl_ieee_setets_core(struct mlx5e_priv *priv, struct ieee_ets *ets) } static int mlx5e_dbcnl_validate_ets(struct net_device *netdev, - struct ieee_ets *ets) + struct ieee_ets *ets, + bool zero_sum_allowed) { bool have_ets_tc = false; int bw_sum = 0; @@ -300,8 +301,9 @@ static int mlx5e_dbcnl_validate_ets(struct net_device *netdev, } if (have_ets_tc && bw_sum != 100) { - netdev_err(netdev, - "Failed to validate ETS: BW sum is illegal\n"); + if (bw_sum || (!bw_sum && !zero_sum_allowed)) + netdev_err(netdev, + "Failed to validate ETS: BW sum is illegal\n"); return -EINVAL; } return 0; @@ -316,7 +318,7 @@ static int mlx5e_dcbnl_ieee_setets(struct net_device *netdev, if (!MLX5_CAP_GEN(priv->mdev, ets)) return -EOPNOTSUPP; - err = mlx5e_dbcnl_validate_ets(netdev, ets); + err = mlx5e_dbcnl_validate_ets(netdev, ets, false); if (err) return err; @@ -642,12 +644,9 @@ static u8 mlx5e_dcbnl_setall(struct net_device *netdev) ets.prio_tc[i]); } - err = mlx5e_dbcnl_validate_ets(netdev, &ets); - if (err) { - netdev_err(netdev, - "%s, Failed to validate ETS: %d\n", __func__, err); + err = mlx5e_dbcnl_validate_ets(netdev, &ets, true); + if (err) goto out; - } err = mlx5e_dcbnl_ieee_setets_core(priv, &ets); if (err) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index e9888d6c1f7c..04fe20716bd1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -2004,6 +2004,10 @@ static bool actions_match_supported(struct mlx5e_priv *priv, else actions = flow->nic_attr->action; + if (flow->flags & MLX5E_TC_FLOW_EGRESS && + !(actions & MLX5_FLOW_CONTEXT_ACTION_DECAP)) + return false; + if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) return modify_header_match_supported(&parse_attr->spec, exts); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index b79d74860a30..dd01ad4c0b54 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -2216,6 +2216,6 @@ free_out: u8 mlx5_eswitch_mode(struct mlx5_eswitch *esw) { - return esw->mode; + return ESW_ALLOWED(esw) ? esw->mode : SRIOV_NONE; } EXPORT_SYMBOL_GPL(mlx5_eswitch_mode); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index f880ffc9acd6..a21df24b695e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -1889,7 +1889,7 @@ mlx5_add_flow_rules(struct mlx5_flow_table *ft, if (flow_act->action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) { if (!fwd_next_prio_supported(ft)) return ERR_PTR(-EOPNOTSUPP); - if (dest) + if (dest_num) return ERR_PTR(-EINVAL); mutex_lock(&root->chain_lock); next_ft = find_next_chained_ft(prio); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c index 1e062e6b2587..3f767cde4c1d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c @@ -488,6 +488,7 @@ void mlx5_pps_event(struct mlx5_core_dev *mdev, void mlx5_init_clock(struct mlx5_core_dev *mdev) { struct mlx5_clock *clock = &mdev->clock; + u64 overflow_cycles; u64 ns; u64 frac = 0; u32 dev_freq; @@ -511,10 +512,17 @@ void mlx5_init_clock(struct mlx5_core_dev *mdev) /* Calculate period in seconds to call the overflow watchdog - to make * sure counter is checked at least once every wrap around. + * The period is calculated as the minimum between max HW cycles count + * (The clock source mask) and max amount of cycles that can be + * multiplied by clock multiplier where the result doesn't exceed + * 64bits. */ - ns = cyclecounter_cyc2ns(&clock->cycles, clock->cycles.mask, + overflow_cycles = div64_u64(~0ULL >> 1, clock->cycles.mult); + overflow_cycles = min(overflow_cycles, clock->cycles.mask >> 1); + + ns = cyclecounter_cyc2ns(&clock->cycles, overflow_cycles, frac, &frac); - do_div(ns, NSEC_PER_SEC / 2 / HZ); + do_div(ns, NSEC_PER_SEC / HZ); clock->overflow_period = ns; mdev->clock_info_page = alloc_page(GFP_KERNEL); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.c b/drivers/net/ethernet/mellanox/mlx5/core/wq.c index b97bb72b4db4..86478a6b99c5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/wq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.c @@ -113,35 +113,45 @@ err_db_free: return err; } -static void mlx5e_qp_set_frag_buf(struct mlx5_frag_buf *buf, - struct mlx5_wq_qp *qp) +static void mlx5_qp_set_frag_buf(struct mlx5_frag_buf *buf, + struct mlx5_wq_qp *qp) { + struct mlx5_frag_buf_ctrl *sq_fbc; struct mlx5_frag_buf *rqb, *sqb; - rqb = &qp->rq.fbc.frag_buf; + rqb = &qp->rq.fbc.frag_buf; *rqb = *buf; rqb->size = mlx5_wq_cyc_get_byte_size(&qp->rq); - rqb->npages = 1 << get_order(rqb->size); + rqb->npages = DIV_ROUND_UP(rqb->size, PAGE_SIZE); - sqb = &qp->sq.fbc.frag_buf; - *sqb = *buf; - sqb->size = mlx5_wq_cyc_get_byte_size(&qp->rq); - sqb->npages = 1 << get_order(sqb->size); + sq_fbc = &qp->sq.fbc; + sqb = &sq_fbc->frag_buf; + *sqb = *buf; + sqb->size = mlx5_wq_cyc_get_byte_size(&qp->sq); + sqb->npages = DIV_ROUND_UP(sqb->size, PAGE_SIZE); sqb->frags += rqb->npages; /* first part is for the rq */ + if (sq_fbc->strides_offset) + sqb->frags--; } int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, void *qpc, struct mlx5_wq_qp *wq, struct mlx5_wq_ctrl *wq_ctrl) { + u32 sq_strides_offset; int err; mlx5_fill_fbc(MLX5_GET(qpc, qpc, log_rq_stride) + 4, MLX5_GET(qpc, qpc, log_rq_size), &wq->rq.fbc); - mlx5_fill_fbc(ilog2(MLX5_SEND_WQE_BB), - MLX5_GET(qpc, qpc, log_sq_size), - &wq->sq.fbc); + + sq_strides_offset = + ((wq->rq.fbc.frag_sz_m1 + 1) % PAGE_SIZE) / MLX5_SEND_WQE_BB; + + mlx5_fill_fbc_offset(ilog2(MLX5_SEND_WQE_BB), + MLX5_GET(qpc, qpc, log_sq_size), + sq_strides_offset, + &wq->sq.fbc); err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node); if (err) { @@ -156,7 +166,7 @@ int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, goto err_db_free; } - mlx5e_qp_set_frag_buf(&wq_ctrl->buf, wq); + mlx5_qp_set_frag_buf(&wq_ctrl->buf, wq); wq->rq.db = &wq_ctrl->db.db[MLX5_RCV_DBR]; wq->sq.db = &wq_ctrl->db.db[MLX5_SND_DBR]; diff --git a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c index 78afe75129ab..382bb93cb090 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c +++ b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c @@ -317,7 +317,7 @@ nfp_tun_write_neigh(struct net_device *netdev, struct nfp_app *app, payload.dst_ipv4 = flow->daddr; /* If entry has expired send dst IP with all other fields 0. */ - if (!(neigh->nud_state & NUD_VALID)) { + if (!(neigh->nud_state & NUD_VALID) || neigh->dead) { nfp_tun_del_route_from_cache(app, payload.dst_ipv4); /* Trigger ARP to verify invalid neighbour state. */ neigh_event_send(neigh, NULL); diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c index 99973e10b179..5ede6408649d 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_l2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c @@ -665,7 +665,7 @@ qed_sp_update_mcast_bin(struct qed_hwfn *p_hwfn, p_ramrod->common.update_approx_mcast_flg = 1; for (i = 0; i < ETH_MULTICAST_MAC_BINS_IN_REGS; i++) { - u32 *p_bins = (u32 *)p_params->bins; + u32 *p_bins = p_params->bins; p_ramrod->approx_mcast.bins[i] = cpu_to_le32(p_bins[i]); } @@ -1476,8 +1476,8 @@ qed_sp_eth_filter_mcast(struct qed_hwfn *p_hwfn, enum spq_mode comp_mode, struct qed_spq_comp_cb *p_comp_data) { - unsigned long bins[ETH_MULTICAST_MAC_BINS_IN_REGS]; struct vport_update_ramrod_data *p_ramrod = NULL; + u32 bins[ETH_MULTICAST_MAC_BINS_IN_REGS]; struct qed_spq_entry *p_ent = NULL; struct qed_sp_init_data init_data; u8 abs_vport_id = 0; @@ -1513,26 +1513,25 @@ qed_sp_eth_filter_mcast(struct qed_hwfn *p_hwfn, /* explicitly clear out the entire vector */ memset(&p_ramrod->approx_mcast.bins, 0, sizeof(p_ramrod->approx_mcast.bins)); - memset(bins, 0, sizeof(unsigned long) * - ETH_MULTICAST_MAC_BINS_IN_REGS); + memset(bins, 0, sizeof(bins)); /* filter ADD op is explicit set op and it removes * any existing filters for the vport */ if (p_filter_cmd->opcode == QED_FILTER_ADD) { for (i = 0; i < p_filter_cmd->num_mc_addrs; i++) { - u32 bit; + u32 bit, nbits; bit = qed_mcast_bin_from_mac(p_filter_cmd->mac[i]); - __set_bit(bit, bins); + nbits = sizeof(u32) * BITS_PER_BYTE; + bins[bit / nbits] |= 1 << (bit % nbits); } /* Convert to correct endianity */ for (i = 0; i < ETH_MULTICAST_MAC_BINS_IN_REGS; i++) { struct vport_update_ramrod_mcast *p_ramrod_bins; - u32 *p_bins = (u32 *)bins; p_ramrod_bins = &p_ramrod->approx_mcast; - p_ramrod_bins->bins[i] = cpu_to_le32(p_bins[i]); + p_ramrod_bins->bins[i] = cpu_to_le32(bins[i]); } } diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.h b/drivers/net/ethernet/qlogic/qed/qed_l2.h index 806a8da257e9..8d80f1095d17 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_l2.h +++ b/drivers/net/ethernet/qlogic/qed/qed_l2.h @@ -215,7 +215,7 @@ struct qed_sp_vport_update_params { u8 anti_spoofing_en; u8 update_accept_any_vlan_flg; u8 accept_any_vlan; - unsigned long bins[8]; + u32 bins[8]; struct qed_rss_params *rss_params; struct qed_filter_accept_flags accept_flags; struct qed_sge_tpa_params *sge_tpa_params; diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c index 15f1b3294289..96a67efac308 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c @@ -1211,6 +1211,7 @@ static void qed_mcp_handle_link_change(struct qed_hwfn *p_hwfn, break; default: p_link->speed = 0; + p_link->link_up = 0; } if (p_link->link_up && p_link->speed) @@ -1308,9 +1309,15 @@ int qed_mcp_set_link(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, bool b_up) phy_cfg.pause |= (params->pause.forced_tx) ? ETH_PAUSE_TX : 0; phy_cfg.adv_speed = params->speed.advertised_speeds; phy_cfg.loopback_mode = params->loopback_mode; - if (p_hwfn->mcp_info->capabilities & FW_MB_PARAM_FEATURE_SUPPORT_EEE) { - if (params->eee.enable) - phy_cfg.eee_cfg |= EEE_CFG_EEE_ENABLED; + + /* There are MFWs that share this capability regardless of whether + * this is feasible or not. And given that at the very least adv_caps + * would be set internally by qed, we want to make sure LFA would + * still work. + */ + if ((p_hwfn->mcp_info->capabilities & + FW_MB_PARAM_FEATURE_SUPPORT_EEE) && params->eee.enable) { + phy_cfg.eee_cfg |= EEE_CFG_EEE_ENABLED; if (params->eee.tx_lpi_enable) phy_cfg.eee_cfg |= EEE_CFG_TX_LPI; if (params->eee.adv_caps & QED_EEE_1G_ADV) diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.c b/drivers/net/ethernet/qlogic/qed/qed_sriov.c index fd59cf45f4be..26e918d7f2f9 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sriov.c +++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.c @@ -2831,7 +2831,7 @@ qed_iov_vp_update_mcast_bin_param(struct qed_hwfn *p_hwfn, p_data->update_approx_mcast_flg = 1; memcpy(p_data->bins, p_mcast_tlv->bins, - sizeof(unsigned long) * ETH_MULTICAST_MAC_BINS_IN_REGS); + sizeof(u32) * ETH_MULTICAST_MAC_BINS_IN_REGS); *tlvs_mask |= 1 << QED_IOV_VP_UPDATE_MCAST; } diff --git a/drivers/net/ethernet/qlogic/qed/qed_vf.c b/drivers/net/ethernet/qlogic/qed/qed_vf.c index 2d7fcd6a0777..be6ddde1a104 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_vf.c +++ b/drivers/net/ethernet/qlogic/qed/qed_vf.c @@ -1126,7 +1126,7 @@ int qed_vf_pf_vport_update(struct qed_hwfn *p_hwfn, resp_size += sizeof(struct pfvf_def_resp_tlv); memcpy(p_mcast_tlv->bins, p_params->bins, - sizeof(unsigned long) * ETH_MULTICAST_MAC_BINS_IN_REGS); + sizeof(u32) * ETH_MULTICAST_MAC_BINS_IN_REGS); } update_rx = p_params->accept_flags.update_rx_mode_config; @@ -1272,7 +1272,7 @@ void qed_vf_pf_filter_mcast(struct qed_hwfn *p_hwfn, u32 bit; bit = qed_mcast_bin_from_mac(p_filter_cmd->mac[i]); - __set_bit(bit, sp_params.bins); + sp_params.bins[bit / 32] |= 1 << (bit % 32); } } diff --git a/drivers/net/ethernet/qlogic/qed/qed_vf.h b/drivers/net/ethernet/qlogic/qed/qed_vf.h index 4f05d5eb3cf5..033409db86ae 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_vf.h +++ b/drivers/net/ethernet/qlogic/qed/qed_vf.h @@ -392,7 +392,12 @@ struct vfpf_vport_update_mcast_bin_tlv { struct channel_tlv tl; u8 padding[4]; - u64 bins[8]; + /* There are only 256 approx bins, and in HSI they're divided into + * 32-bit values. As old VFs used to set-bit to the values on its side, + * the upper half of the array is never expected to contain any data. + */ + u64 bins[4]; + u64 obsolete_bins[4]; }; struct vfpf_vport_update_accept_param_tlv { diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index 49a6e25ddc2b..8ea1fa36ca43 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -7396,8 +7396,7 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) return rc; } - /* override BIOS settings, use userspace tools to enable WOL */ - __rtl8169_set_wol(tp, 0); + tp->saved_wolopts = __rtl8169_get_wol(tp); mutex_init(&tp->wk.mutex); u64_stats_init(&tp->rx_stats.syncp); diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_mdio.c b/drivers/net/ethernet/xilinx/xilinx_axienet_mdio.c index 16c3bfbe1992..757a3b37ae8a 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_mdio.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_mdio.c @@ -218,6 +218,7 @@ issue: ret = of_mdiobus_register(bus, np1); if (ret) { mdiobus_free(bus); + lp->mii_bus = NULL; return ret; } return 0; diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index d2baedc4ea91..914fe8e6ac86 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -519,7 +519,7 @@ static int phy_start_aneg_priv(struct phy_device *phydev, bool sync) * negotiation may already be done and aneg interrupt may not be * generated. */ - if (phy_interrupt_is_valid(phydev) && (phydev->state == PHY_AN)) { + if (phydev->irq != PHY_POLL && phydev->state == PHY_AN) { err = phy_aneg_done(phydev); if (err > 0) { trigger = true; diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 38502809420b..cb0cc30c3d6a 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1246,7 +1246,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x413c, 0x81b3, 8)}, /* Dell Wireless 5809e Gobi(TM) 4G LTE Mobile Broadband Card (rev3) */ {QMI_FIXED_INTF(0x413c, 0x81b6, 8)}, /* Dell Wireless 5811e */ {QMI_FIXED_INTF(0x413c, 0x81b6, 10)}, /* Dell Wireless 5811e */ - {QMI_FIXED_INTF(0x413c, 0x81d7, 1)}, /* Dell Wireless 5821e */ + {QMI_FIXED_INTF(0x413c, 0x81d7, 0)}, /* Dell Wireless 5821e */ {QMI_FIXED_INTF(0x03f0, 0x4e1d, 8)}, /* HP lt4111 LTE/EV-DO/HSPA+ Gobi 4G Module */ {QMI_FIXED_INTF(0x03f0, 0x9d1d, 1)}, /* HP lt4120 Snapdragon X5 LTE */ {QMI_FIXED_INTF(0x22de, 0x9061, 3)}, /* WeTelecom WPD-600N */ diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 46df030b2c3f..bf65501e6ed6 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -100,6 +100,22 @@ static struct class *nvme_subsys_class; static void nvme_ns_remove(struct nvme_ns *ns); static int nvme_revalidate_disk(struct gendisk *disk); static void nvme_put_subsystem(struct nvme_subsystem *subsys); +static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl, + unsigned nsid); + +static void nvme_set_queue_dying(struct nvme_ns *ns) +{ + /* + * Revalidating a dead namespace sets capacity to 0. This will end + * buffered writers dirtying pages that can't be synced. + */ + if (!ns->disk || test_and_set_bit(NVME_NS_DEAD, &ns->flags)) + return; + revalidate_disk(ns->disk); + blk_set_queue_dying(ns->queue); + /* Forcibly unquiesce queues to avoid blocking dispatch */ + blk_mq_unquiesce_queue(ns->queue); +} static void nvme_queue_scan(struct nvme_ctrl *ctrl) { @@ -1044,14 +1060,17 @@ EXPORT_SYMBOL_GPL(nvme_set_queue_count); static void nvme_enable_aen(struct nvme_ctrl *ctrl) { - u32 result; + u32 result, supported_aens = ctrl->oaes & NVME_AEN_SUPPORTED; int status; - status = nvme_set_features(ctrl, NVME_FEAT_ASYNC_EVENT, - ctrl->oaes & NVME_AEN_SUPPORTED, NULL, 0, &result); + if (!supported_aens) + return; + + status = nvme_set_features(ctrl, NVME_FEAT_ASYNC_EVENT, supported_aens, + NULL, 0, &result); if (status) dev_warn(ctrl->device, "Failed to configure AEN (cfg %x)\n", - ctrl->oaes & NVME_AEN_SUPPORTED); + supported_aens); } static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) @@ -1151,19 +1170,15 @@ static u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns, static void nvme_update_formats(struct nvme_ctrl *ctrl) { - struct nvme_ns *ns, *next; - LIST_HEAD(rm_list); + struct nvme_ns *ns; - down_write(&ctrl->namespaces_rwsem); - list_for_each_entry(ns, &ctrl->namespaces, list) { - if (ns->disk && nvme_revalidate_disk(ns->disk)) { - list_move_tail(&ns->list, &rm_list); - } - } - up_write(&ctrl->namespaces_rwsem); + down_read(&ctrl->namespaces_rwsem); + list_for_each_entry(ns, &ctrl->namespaces, list) + if (ns->disk && nvme_revalidate_disk(ns->disk)) + nvme_set_queue_dying(ns); + up_read(&ctrl->namespaces_rwsem); - list_for_each_entry_safe(ns, next, &rm_list, list) - nvme_ns_remove(ns); + nvme_remove_invalid_namespaces(ctrl, NVME_NSID_ALL); } static void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects) @@ -1218,7 +1233,7 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns, effects = nvme_passthru_start(ctrl, ns, cmd.opcode); status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c, (void __user *)(uintptr_t)cmd.addr, cmd.data_len, - (void __user *)(uintptr_t)cmd.metadata, cmd.metadata, + (void __user *)(uintptr_t)cmd.metadata, cmd.metadata_len, 0, &cmd.result, timeout); nvme_passthru_end(ctrl, effects); @@ -3138,7 +3153,7 @@ static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl, down_write(&ctrl->namespaces_rwsem); list_for_each_entry_safe(ns, next, &ctrl->namespaces, list) { - if (ns->head->ns_id > nsid) + if (ns->head->ns_id > nsid || test_bit(NVME_NS_DEAD, &ns->flags)) list_move_tail(&ns->list, &rm_list); } up_write(&ctrl->namespaces_rwsem); @@ -3542,19 +3557,9 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl) if (ctrl->admin_q) blk_mq_unquiesce_queue(ctrl->admin_q); - list_for_each_entry(ns, &ctrl->namespaces, list) { - /* - * Revalidating a dead namespace sets capacity to 0. This will - * end buffered writers dirtying pages that can't be synced. - */ - if (!ns->disk || test_and_set_bit(NVME_NS_DEAD, &ns->flags)) - continue; - revalidate_disk(ns->disk); - blk_set_queue_dying(ns->queue); + list_for_each_entry(ns, &ctrl->namespaces, list) + nvme_set_queue_dying(ns); - /* Forcibly unquiesce queues to avoid blocking dispatch */ - blk_mq_unquiesce_queue(ns->queue); - } up_read(&ctrl->namespaces_rwsem); } EXPORT_SYMBOL_GPL(nvme_kill_queues); diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index ba943f211687..ddd441b1516a 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2556,11 +2556,6 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) quirks |= check_vendor_combination_bug(pdev); - result = nvme_init_ctrl(&dev->ctrl, &pdev->dev, &nvme_pci_ctrl_ops, - quirks); - if (result) - goto release_pools; - /* * Double check that our mempool alloc size will cover the biggest * command we support. @@ -2578,6 +2573,11 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) goto release_pools; } + result = nvme_init_ctrl(&dev->ctrl, &pdev->dev, &nvme_pci_ctrl_ops, + quirks); + if (result) + goto release_mempool; + dev_info(dev->ctrl.device, "pci function %s\n", dev_name(&pdev->dev)); nvme_get_ctrl(&dev->ctrl); @@ -2585,6 +2585,8 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) return 0; + release_mempool: + mempool_destroy(dev->iod_mempool); release_pools: nvme_release_prp_pools(dev); unmap: diff --git a/drivers/scsi/cxlflash/ocxl_hw.c b/drivers/scsi/cxlflash/ocxl_hw.c index 0a95b5f25380..497a68389461 100644 --- a/drivers/scsi/cxlflash/ocxl_hw.c +++ b/drivers/scsi/cxlflash/ocxl_hw.c @@ -134,15 +134,14 @@ static struct file *ocxlflash_getfile(struct device *dev, const char *name, rc = PTR_ERR(file); dev_err(dev, "%s: alloc_file failed rc=%d\n", __func__, rc); - goto err5; + path_put(&path); + goto err3; } file->f_flags = flags & (O_ACCMODE | O_NONBLOCK); file->private_data = priv; out: return file; -err5: - path_put(&path); err4: iput(inode); err3: diff --git a/drivers/soc/imx/gpc.c b/drivers/soc/imx/gpc.c index 32f0748fd067..0097a939487f 100644 --- a/drivers/soc/imx/gpc.c +++ b/drivers/soc/imx/gpc.c @@ -27,9 +27,16 @@ #define GPC_PGC_SW2ISO_SHIFT 0x8 #define GPC_PGC_SW_SHIFT 0x0 +#define GPC_PGC_PCI_PDN 0x200 +#define GPC_PGC_PCI_SR 0x20c + #define GPC_PGC_GPU_PDN 0x260 #define GPC_PGC_GPU_PUPSCR 0x264 #define GPC_PGC_GPU_PDNSCR 0x268 +#define GPC_PGC_GPU_SR 0x26c + +#define GPC_PGC_DISP_PDN 0x240 +#define GPC_PGC_DISP_SR 0x24c #define GPU_VPU_PUP_REQ BIT(1) #define GPU_VPU_PDN_REQ BIT(0) @@ -318,10 +325,24 @@ static const struct of_device_id imx_gpc_dt_ids[] = { { } }; +static const struct regmap_range yes_ranges[] = { + regmap_reg_range(GPC_CNTR, GPC_CNTR), + regmap_reg_range(GPC_PGC_PCI_PDN, GPC_PGC_PCI_SR), + regmap_reg_range(GPC_PGC_GPU_PDN, GPC_PGC_GPU_SR), + regmap_reg_range(GPC_PGC_DISP_PDN, GPC_PGC_DISP_SR), +}; + +static const struct regmap_access_table access_table = { + .yes_ranges = yes_ranges, + .n_yes_ranges = ARRAY_SIZE(yes_ranges), +}; + static const struct regmap_config imx_gpc_regmap_config = { .reg_bits = 32, .val_bits = 32, .reg_stride = 4, + .rd_table = &access_table, + .wr_table = &access_table, .max_register = 0x2ac, }; diff --git a/drivers/staging/media/omap4iss/iss_video.c b/drivers/staging/media/omap4iss/iss_video.c index a3a83424a926..16478fe9e3f8 100644 --- a/drivers/staging/media/omap4iss/iss_video.c +++ b/drivers/staging/media/omap4iss/iss_video.c @@ -11,7 +11,6 @@ * (at your option) any later version. */ -#include <asm/cacheflush.h> #include <linux/clk.h> #include <linux/mm.h> #include <linux/pagemap.h> @@ -24,6 +23,8 @@ #include <media/v4l2-ioctl.h> #include <media/v4l2-mc.h> +#include <asm/cacheflush.h> + #include "iss_video.h" #include "iss.h" diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c index 759a5bdd40e1..7cd63b0c1a46 100644 --- a/drivers/vfio/vfio_iommu_spapr_tce.c +++ b/drivers/vfio/vfio_iommu_spapr_tce.c @@ -457,17 +457,17 @@ static void tce_iommu_unuse_page(struct tce_container *container, } static int tce_iommu_prereg_ua_to_hpa(struct tce_container *container, - unsigned long tce, unsigned long size, + unsigned long tce, unsigned long shift, unsigned long *phpa, struct mm_iommu_table_group_mem_t **pmem) { long ret = 0; struct mm_iommu_table_group_mem_t *mem; - mem = mm_iommu_lookup(container->mm, tce, size); + mem = mm_iommu_lookup(container->mm, tce, 1ULL << shift); if (!mem) return -EINVAL; - ret = mm_iommu_ua_to_hpa(mem, tce, phpa); + ret = mm_iommu_ua_to_hpa(mem, tce, shift, phpa); if (ret) return -EINVAL; @@ -487,7 +487,7 @@ static void tce_iommu_unuse_page_v2(struct tce_container *container, if (!pua) return; - ret = tce_iommu_prereg_ua_to_hpa(container, *pua, IOMMU_PAGE_SIZE(tbl), + ret = tce_iommu_prereg_ua_to_hpa(container, *pua, tbl->it_page_shift, &hpa, &mem); if (ret) pr_debug("%s: tce %lx at #%lx was not cached, ret=%d\n", @@ -611,7 +611,7 @@ static long tce_iommu_build_v2(struct tce_container *container, entry + i); ret = tce_iommu_prereg_ua_to_hpa(container, - tce, IOMMU_PAGE_SIZE(tbl), &hpa, &mem); + tce, tbl->it_page_shift, &hpa, &mem); if (ret) break; @@ -1896,6 +1896,11 @@ SYSCALL_DEFINE5(io_getevents, aio_context_t, ctx_id, return ret; } +struct __aio_sigset { + const sigset_t __user *sigmask; + size_t sigsetsize; +}; + SYSCALL_DEFINE6(io_pgetevents, aio_context_t, ctx_id, long, min_nr, diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index e55843f536bc..b3e45714d28f 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -4238,8 +4238,9 @@ int try_release_extent_mapping(struct page *page, gfp_t mask) struct extent_map *em; u64 start = page_offset(page); u64 end = start + PAGE_SIZE - 1; - struct extent_io_tree *tree = &BTRFS_I(page->mapping->host)->io_tree; - struct extent_map_tree *map = &BTRFS_I(page->mapping->host)->extent_tree; + struct btrfs_inode *btrfs_inode = BTRFS_I(page->mapping->host); + struct extent_io_tree *tree = &btrfs_inode->io_tree; + struct extent_map_tree *map = &btrfs_inode->extent_tree; if (gfpflags_allow_blocking(mask) && page->mapping->host->i_size > SZ_16M) { @@ -4262,6 +4263,8 @@ int try_release_extent_mapping(struct page *page, gfp_t mask) extent_map_end(em) - 1, EXTENT_LOCKED | EXTENT_WRITEBACK, 0, NULL)) { + set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, + &btrfs_inode->runtime_flags); remove_extent_mapping(map, em); /* once for the rb tree */ free_extent_map(em); diff --git a/fs/exec.c b/fs/exec.c index 2d4e0075bd24..72e961a62adb 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -290,7 +290,7 @@ static int __bprm_mm_init(struct linux_binprm *bprm) struct vm_area_struct *vma = NULL; struct mm_struct *mm = bprm->mm; - bprm->vma = vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); + bprm->vma = vma = vm_area_alloc(mm); if (!vma) return -ENOMEM; @@ -298,7 +298,6 @@ static int __bprm_mm_init(struct linux_binprm *bprm) err = -EINTR; goto err_free; } - vma->vm_mm = mm; /* * Place the stack at the largest stack address the architecture @@ -311,7 +310,6 @@ static int __bprm_mm_init(struct linux_binprm *bprm) vma->vm_start = vma->vm_end - PAGE_SIZE; vma->vm_flags = VM_SOFTDIRTY | VM_STACK_FLAGS | VM_STACK_INCOMPLETE_SETUP; vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); - INIT_LIST_HEAD(&vma->anon_vma_chain); err = insert_vm_struct(mm, vma); if (err) @@ -326,7 +324,7 @@ err: up_write(&mm->mmap_sem); err_free: bprm->vma = NULL; - kmem_cache_free(vm_area_cachep, vma); + vm_area_free(vma); return err; } diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 065dc919a0ce..bfd589ea74c0 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -707,13 +707,21 @@ static void fat_set_state(struct super_block *sb, brelse(bh); } +static void fat_reset_iocharset(struct fat_mount_options *opts) +{ + if (opts->iocharset != fat_default_iocharset) { + /* Note: opts->iocharset can be NULL here */ + kfree(opts->iocharset); + opts->iocharset = fat_default_iocharset; + } +} + static void delayed_free(struct rcu_head *p) { struct msdos_sb_info *sbi = container_of(p, struct msdos_sb_info, rcu); unload_nls(sbi->nls_disk); unload_nls(sbi->nls_io); - if (sbi->options.iocharset != fat_default_iocharset) - kfree(sbi->options.iocharset); + fat_reset_iocharset(&sbi->options); kfree(sbi); } @@ -1132,7 +1140,7 @@ static int parse_options(struct super_block *sb, char *options, int is_vfat, opts->fs_fmask = opts->fs_dmask = current_umask(); opts->allow_utime = -1; opts->codepage = fat_default_codepage; - opts->iocharset = fat_default_iocharset; + fat_reset_iocharset(opts); if (is_vfat) { opts->shortname = VFAT_SFN_DISPLAY_WINNT|VFAT_SFN_CREATE_WIN95; opts->rodir = 0; @@ -1289,8 +1297,7 @@ static int parse_options(struct super_block *sb, char *options, int is_vfat, /* vfat specific */ case Opt_charset: - if (opts->iocharset != fat_default_iocharset) - kfree(opts->iocharset); + fat_reset_iocharset(opts); iocharset = match_strdup(&args[0]); if (!iocharset) return -ENOMEM; @@ -1881,8 +1888,7 @@ out_fail: iput(fat_inode); unload_nls(sbi->nls_io); unload_nls(sbi->nls_disk); - if (sbi->options.iocharset != fat_default_iocharset) - kfree(sbi->options.iocharset); + fat_reset_iocharset(&sbi->options); sb->s_fs_info = NULL; kfree(sbi); return error; diff --git a/fs/internal.h b/fs/internal.h index 980d005b21b4..5645b4ebf494 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -127,7 +127,6 @@ int do_fchownat(int dfd, const char __user *filename, uid_t user, gid_t group, extern int open_check_o_direct(struct file *f); extern int vfs_open(const struct path *, struct file *, const struct cred *); -extern struct file *filp_clone_open(struct file *); /* * inode.c diff --git a/include/linux/bpfilter.h b/include/linux/bpfilter.h index 687b1760bb9f..f02cee0225d4 100644 --- a/include/linux/bpfilter.h +++ b/include/linux/bpfilter.h @@ -5,10 +5,10 @@ #include <uapi/linux/bpfilter.h> struct sock; -int bpfilter_ip_set_sockopt(struct sock *sk, int optname, char *optval, +int bpfilter_ip_set_sockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen); -int bpfilter_ip_get_sockopt(struct sock *sk, int optname, char *optval, - int *optlen); +int bpfilter_ip_get_sockopt(struct sock *sk, int optname, char __user *optval, + int __user *optlen); extern int (*bpfilter_process_sockopt)(struct sock *sk, int optname, char __user *optval, unsigned int optlen, bool is_set); diff --git a/include/linux/fs.h b/include/linux/fs.h index d78d146a98da..805bf22898cf 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2420,6 +2420,7 @@ extern struct file *filp_open(const char *, int, umode_t); extern struct file *file_open_root(struct dentry *, struct vfsmount *, const char *, int, umode_t); extern struct file * dentry_open(const struct path *, int, const struct cred *); +extern struct file *filp_clone_open(struct file *); extern int filp_close(struct file *, fl_owner_t id); extern struct filename *getname_flags(const char __user *, int, int *); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 86cb0ebf92fa..fd0aaa5568fe 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -363,6 +363,7 @@ struct mlx5_frag_buf_ctrl { struct mlx5_frag_buf frag_buf; u32 sz_m1; u32 frag_sz_m1; + u32 strides_offset; u8 log_sz; u8 log_stride; u8 log_frag_strides; @@ -991,14 +992,22 @@ static inline u32 mlx5_base_mkey(const u32 key) return key & 0xffffff00u; } -static inline void mlx5_fill_fbc(u8 log_stride, u8 log_sz, - struct mlx5_frag_buf_ctrl *fbc) +static inline void mlx5_fill_fbc_offset(u8 log_stride, u8 log_sz, + u32 strides_offset, + struct mlx5_frag_buf_ctrl *fbc) { fbc->log_stride = log_stride; fbc->log_sz = log_sz; fbc->sz_m1 = (1 << fbc->log_sz) - 1; fbc->log_frag_strides = PAGE_SHIFT - fbc->log_stride; fbc->frag_sz_m1 = (1 << fbc->log_frag_strides) - 1; + fbc->strides_offset = strides_offset; +} + +static inline void mlx5_fill_fbc(u8 log_stride, u8 log_sz, + struct mlx5_frag_buf_ctrl *fbc) +{ + mlx5_fill_fbc_offset(log_stride, log_sz, 0, fbc); } static inline void mlx5_core_init_cq_frag_buf(struct mlx5_frag_buf_ctrl *fbc, @@ -1012,7 +1021,10 @@ static inline void mlx5_core_init_cq_frag_buf(struct mlx5_frag_buf_ctrl *fbc, static inline void *mlx5_frag_buf_get_wqe(struct mlx5_frag_buf_ctrl *fbc, u32 ix) { - unsigned int frag = (ix >> fbc->log_frag_strides); + unsigned int frag; + + ix += fbc->strides_offset; + frag = ix >> fbc->log_frag_strides; return fbc->frag_buf.frags[frag].buf + ((fbc->frag_sz_m1 & ix) << fbc->log_stride); diff --git a/include/linux/mm.h b/include/linux/mm.h index 3982c83fdcbf..d3a3842316b8 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -155,7 +155,9 @@ extern int overcommit_kbytes_handler(struct ctl_table *, int, void __user *, * mmap() functions). */ -extern struct kmem_cache *vm_area_cachep; +struct vm_area_struct *vm_area_alloc(struct mm_struct *); +struct vm_area_struct *vm_area_dup(struct vm_area_struct *); +void vm_area_free(struct vm_area_struct *); #ifndef CONFIG_MMU extern struct rb_root nommu_region_tree; diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index 5be31eb7b266..108ede99e533 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -75,7 +75,7 @@ extern long _do_fork(unsigned long, unsigned long, unsigned long, int __user *, extern long do_fork(unsigned long, unsigned long, unsigned long, int __user *, int __user *); struct task_struct *fork_idle(int); extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags); -extern long kernel_wait4(pid_t, int *, int, struct rusage *); +extern long kernel_wait4(pid_t, int __user *, int, struct rusage *); extern void free_task(struct task_struct *tsk); diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index a368a68cb667..5c1a0933768e 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -11,6 +11,7 @@ #ifndef _LINUX_SYSCALLS_H #define _LINUX_SYSCALLS_H +struct __aio_sigset; struct epoll_event; struct iattr; struct inode; diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 9ba1f289c439..9a850973e09a 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -5937,10 +5937,11 @@ void cfg80211_mgmt_tx_status(struct wireless_dev *wdev, u64 cookie, /** * cfg80211_rx_control_port - notification about a received control port frame * @dev: The device the frame matched to - * @buf: control port frame - * @len: length of the frame data - * @addr: The peer from which the frame was received - * @proto: frame protocol, typically PAE or Pre-authentication + * @skb: The skbuf with the control port frame. It is assumed that the skbuf + * is 802.3 formatted (with 802.3 header). The skb can be non-linear. + * This function does not take ownership of the skb, so the caller is + * responsible for any cleanup. The caller must also ensure that + * skb->protocol is set appropriately. * @unencrypted: Whether the frame was received unencrypted * * This function is used to inform userspace about a received control port @@ -5953,8 +5954,7 @@ void cfg80211_mgmt_tx_status(struct wireless_dev *wdev, u64 cookie, * Return: %true if the frame was passed to userspace */ bool cfg80211_rx_control_port(struct net_device *dev, - const u8 *buf, size_t len, - const u8 *addr, u16 proto, bool unencrypted); + struct sk_buff *skb, bool unencrypted); /** * cfg80211_cqm_rssi_notify - connection quality monitoring rssi event diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 71b9043aa0e7..3d4930528db0 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -281,6 +281,11 @@ static inline void fib6_info_hold(struct fib6_info *f6i) atomic_inc(&f6i->fib6_ref); } +static inline bool fib6_info_hold_safe(struct fib6_info *f6i) +{ + return atomic_inc_not_zero(&f6i->fib6_ref); +} + static inline void fib6_info_release(struct fib6_info *f6i) { if (f6i && atomic_dec_and_test(&f6i->fib6_ref)) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 08c005ce56e9..dc417ef0a0c5 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -150,6 +150,7 @@ static inline void nft_data_debug(const struct nft_data *data) * @portid: netlink portID of the original message * @seq: netlink sequence number * @family: protocol family + * @level: depth of the chains * @report: notify via unicast netlink message */ struct nft_ctx { @@ -160,6 +161,7 @@ struct nft_ctx { u32 portid; u32 seq; u8 family; + u8 level; bool report; }; @@ -865,7 +867,6 @@ enum nft_chain_flags { * @table: table that this chain belongs to * @handle: chain handle * @use: number of jump references to this chain - * @level: length of longest path to this chain * @flags: bitmask of enum nft_chain_flags * @name: name of the chain */ @@ -878,7 +879,6 @@ struct nft_chain { struct nft_table *table; u64 handle; u32 use; - u16 level; u8 flags:6, genmask:2; char *name; @@ -1124,7 +1124,6 @@ struct nft_flowtable { u32 genmask:2, use:30; u64 handle; - char *dev_name[NFT_FLOWTABLE_DEVICE_MAX]; /* runtime data below here */ struct nf_hook_ops *ops ____cacheline_aligned; struct nf_flowtable data; diff --git a/include/net/tcp.h b/include/net/tcp.h index f6e0a9b1dff3..d769dc20359b 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -342,6 +342,7 @@ ssize_t tcp_splice_read(struct socket *sk, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags); +void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks); static inline void tcp_dec_quickack_mode(struct sock *sk, const unsigned int pkts) { @@ -540,6 +541,7 @@ void tcp_send_fin(struct sock *sk); void tcp_send_active_reset(struct sock *sk, gfp_t priority); int tcp_send_synack(struct sock *); void tcp_push_one(struct sock *, unsigned int mss_now); +void __tcp_send_ack(struct sock *sk, u32 rcv_nxt); void tcp_send_ack(struct sock *sk); void tcp_send_delayed_ack(struct sock *sk); void tcp_send_loss_probe(struct sock *sk); @@ -840,6 +842,11 @@ static inline void bpf_compute_data_end_sk_skb(struct sk_buff *skb) */ static inline int tcp_v6_iif(const struct sk_buff *skb) { + return TCP_SKB_CB(skb)->header.h6.iif; +} + +static inline int tcp_v6_iif_l3_slave(const struct sk_buff *skb) +{ bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags); return l3_slave ? skb->skb_iif : TCP_SKB_CB(skb)->header.h6.iif; diff --git a/include/uapi/linux/aio_abi.h b/include/uapi/linux/aio_abi.h index 3c5038b587ba..d4593a6062ef 100644 --- a/include/uapi/linux/aio_abi.h +++ b/include/uapi/linux/aio_abi.h @@ -29,7 +29,6 @@ #include <linux/types.h> #include <linux/fs.h> -#include <linux/signal.h> #include <asm/byteorder.h> typedef __kernel_ulong_t aio_context_t; @@ -110,10 +109,5 @@ struct iocb { #undef IFBIG #undef IFLITTLE -struct __aio_sigset { - const sigset_t __user *sigmask; - size_t sigsetsize; -}; - #endif /* __LINUX__AIO_ABI_H */ diff --git a/include/uapi/linux/btf.h b/include/uapi/linux/btf.h index 0b5ddbe135a4..972265f32871 100644 --- a/include/uapi/linux/btf.h +++ b/include/uapi/linux/btf.h @@ -76,7 +76,7 @@ struct btf_type { */ #define BTF_INT_ENCODING(VAL) (((VAL) & 0x0f000000) >> 24) #define BTF_INT_OFFSET(VAL) (((VAL & 0x00ff0000)) >> 16) -#define BTF_INT_BITS(VAL) ((VAL) & 0x0000ffff) +#define BTF_INT_BITS(VAL) ((VAL) & 0x000000ff) /* Attributes stored in the BTF_INT_ENCODING */ #define BTF_INT_SIGNED (1 << 0) diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index e016ac3afa24..9704934252b3 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -450,7 +450,7 @@ static const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id) */ static bool btf_type_int_is_regular(const struct btf_type *t) { - u16 nr_bits, nr_bytes; + u8 nr_bits, nr_bytes; u32 int_data; int_data = btf_type_int(t); @@ -993,12 +993,16 @@ static void btf_int_bits_seq_show(const struct btf *btf, { u16 left_shift_bits, right_shift_bits; u32 int_data = btf_type_int(t); - u16 nr_bits = BTF_INT_BITS(int_data); - u16 total_bits_offset; - u16 nr_copy_bytes; - u16 nr_copy_bits; + u8 nr_bits = BTF_INT_BITS(int_data); + u8 total_bits_offset; + u8 nr_copy_bytes; + u8 nr_copy_bits; u64 print_num; + /* + * bits_offset is at most 7. + * BTF_INT_OFFSET() cannot exceed 64 bits. + */ total_bits_offset = bits_offset + BTF_INT_OFFSET(int_data); data += BITS_ROUNDDOWN_BYTES(total_bits_offset); bits_offset = BITS_PER_BYTE_MASKED(total_bits_offset); @@ -1028,7 +1032,7 @@ static void btf_int_seq_show(const struct btf *btf, const struct btf_type *t, u32 int_data = btf_type_int(t); u8 encoding = BTF_INT_ENCODING(int_data); bool sign = encoding & BTF_INT_SIGNED; - u32 nr_bits = BTF_INT_BITS(int_data); + u8 nr_bits = BTF_INT_BITS(int_data); if (bits_offset || BTF_INT_OFFSET(int_data) || BITS_PER_BYTE_MASKED(nr_bits)) { diff --git a/kernel/fork.c b/kernel/fork.c index 9440d61b925c..a191c05e757d 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -303,11 +303,38 @@ struct kmem_cache *files_cachep; struct kmem_cache *fs_cachep; /* SLAB cache for vm_area_struct structures */ -struct kmem_cache *vm_area_cachep; +static struct kmem_cache *vm_area_cachep; /* SLAB cache for mm_struct structures (tsk->mm) */ static struct kmem_cache *mm_cachep; +struct vm_area_struct *vm_area_alloc(struct mm_struct *mm) +{ + struct vm_area_struct *vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); + + if (vma) { + vma->vm_mm = mm; + INIT_LIST_HEAD(&vma->anon_vma_chain); + } + return vma; +} + +struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig) +{ + struct vm_area_struct *new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); + + if (new) { + *new = *orig; + INIT_LIST_HEAD(&new->anon_vma_chain); + } + return new; +} + +void vm_area_free(struct vm_area_struct *vma) +{ + kmem_cache_free(vm_area_cachep, vma); +} + static void account_kernel_stack(struct task_struct *tsk, int account) { void *stack = task_stack_page(tsk); @@ -455,11 +482,9 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm, goto fail_nomem; charge = len; } - tmp = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); + tmp = vm_area_dup(mpnt); if (!tmp) goto fail_nomem; - *tmp = *mpnt; - INIT_LIST_HEAD(&tmp->anon_vma_chain); retval = vma_dup_policy(mpnt, tmp); if (retval) goto fail_nomem_policy; @@ -539,7 +564,7 @@ fail_uprobe_end: fail_nomem_anon_vma_fork: mpol_put(vma_policy(tmp)); fail_nomem_policy: - kmem_cache_free(vm_area_cachep, tmp); + vm_area_free(tmp); fail_nomem: retval = -ENOMEM; vm_unacct_memory(charge); diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index fbfc3f1d368a..10c7b51c0d1f 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -2290,8 +2290,17 @@ static void switched_from_dl(struct rq *rq, struct task_struct *p) if (task_on_rq_queued(p) && p->dl.dl_runtime) task_non_contending(p); - if (!task_on_rq_queued(p)) + if (!task_on_rq_queued(p)) { + /* + * Inactive timer is armed. However, p is leaving DEADLINE and + * might migrate away from this rq while continuing to run on + * some other class. We need to remove its contribution from + * this rq running_bw now, or sub_rq_bw (below) will complain. + */ + if (p->dl.dl_non_contending) + sub_running_bw(&p->dl, &rq->dl); sub_rq_bw(&p->dl, &rq->dl); + } /* * We cannot use inactive_task_timer() to invoke sub_running_bw() diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index f89014a2c238..1ff523dae6e2 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c @@ -270,7 +270,11 @@ unlock: goto retry; } - wake_up_q(&wakeq); + if (!err) { + preempt_disable(); + wake_up_q(&wakeq); + preempt_enable(); + } return err; } diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 7e43cd54c84c..8be175df3075 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -596,15 +596,70 @@ static unsigned long memcpy_mcsafe_to_page(struct page *page, size_t offset, return ret; } +static size_t copy_pipe_to_iter_mcsafe(const void *addr, size_t bytes, + struct iov_iter *i) +{ + struct pipe_inode_info *pipe = i->pipe; + size_t n, off, xfer = 0; + int idx; + + if (!sanity(i)) + return 0; + + bytes = n = push_pipe(i, bytes, &idx, &off); + if (unlikely(!n)) + return 0; + for ( ; n; idx = next_idx(idx, pipe), off = 0) { + size_t chunk = min_t(size_t, n, PAGE_SIZE - off); + unsigned long rem; + + rem = memcpy_mcsafe_to_page(pipe->bufs[idx].page, off, addr, + chunk); + i->idx = idx; + i->iov_offset = off + chunk - rem; + xfer += chunk - rem; + if (rem) + break; + n -= chunk; + addr += chunk; + } + i->count -= xfer; + return xfer; +} + +/** + * _copy_to_iter_mcsafe - copy to user with source-read error exception handling + * @addr: source kernel address + * @bytes: total transfer length + * @iter: destination iterator + * + * The pmem driver arranges for filesystem-dax to use this facility via + * dax_copy_to_iter() for protecting read/write to persistent memory. + * Unless / until an architecture can guarantee identical performance + * between _copy_to_iter_mcsafe() and _copy_to_iter() it would be a + * performance regression to switch more users to the mcsafe version. + * + * Otherwise, the main differences between this and typical _copy_to_iter(). + * + * * Typical tail/residue handling after a fault retries the copy + * byte-by-byte until the fault happens again. Re-triggering machine + * checks is potentially fatal so the implementation uses source + * alignment and poison alignment assumptions to avoid re-triggering + * hardware exceptions. + * + * * ITER_KVEC, ITER_PIPE, and ITER_BVEC can return short copies. + * Compare to copy_to_iter() where only ITER_IOVEC attempts might return + * a short copy. + * + * See MCSAFE_TEST for self-test. + */ size_t _copy_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i) { const char *from = addr; unsigned long rem, curr_addr, s_addr = (unsigned long) addr; - if (unlikely(i->type & ITER_PIPE)) { - WARN_ON(1); - return 0; - } + if (unlikely(i->type & ITER_PIPE)) + return copy_pipe_to_iter_mcsafe(addr, bytes, i); if (iter_is_iovec(i)) might_fault(); iterate_and_advance(i, bytes, v, @@ -701,6 +756,20 @@ size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i) EXPORT_SYMBOL(_copy_from_iter_nocache); #ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE +/** + * _copy_from_iter_flushcache - write destination through cpu cache + * @addr: destination kernel address + * @bytes: total transfer length + * @iter: source iterator + * + * The pmem driver arranges for filesystem-dax to use this facility via + * dax_copy_from_iter() for ensuring that writes to persistent memory + * are flushed through the CPU cache. It is differentiated from + * _copy_from_iter_nocache() in that guarantees all data is flushed for + * all iterator types. The _copy_from_iter_nocache() only attempts to + * bypass the cache for the ITER_IOVEC case, and on some archs may use + * instructions that strand dirty-data in the cache. + */ size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i) { char *to = addr; diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 1cd7c1a57a14..25346bd99364 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2084,6 +2084,8 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, if (vma_is_dax(vma)) return; page = pmd_page(_pmd); + if (!PageDirty(page) && pmd_dirty(_pmd)) + set_page_dirty(page); if (!PageReferenced(page) && pmd_young(_pmd)) SetPageReferenced(page); page_remove_rmap(page, true); diff --git a/mm/memblock.c b/mm/memblock.c index 11e46f83e1ad..4b5d245fafc1 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -20,6 +20,7 @@ #include <linux/kmemleak.h> #include <linux/seq_file.h> #include <linux/memblock.h> +#include <linux/bootmem.h> #include <asm/sections.h> #include <linux/io.h> @@ -1225,6 +1226,7 @@ phys_addr_t __init memblock_alloc_try_nid(phys_addr_t size, phys_addr_t align, i return memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE); } +#if defined(CONFIG_NO_BOOTMEM) /** * memblock_virt_alloc_internal - allocate boot memory block * @size: size of memory block to be allocated in bytes @@ -1432,6 +1434,7 @@ void * __init memblock_virt_alloc_try_nid( (u64)max_addr); return NULL; } +#endif /** * __memblock_free_early - free boot memory block diff --git a/mm/memcontrol.c b/mm/memcontrol.c index e6f0d5ef320a..8c0280b3143e 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -850,7 +850,7 @@ static void invalidate_reclaim_iterators(struct mem_cgroup *dead_memcg) int nid; int i; - while ((memcg = parent_mem_cgroup(memcg))) { + for (; memcg; memcg = parent_mem_cgroup(memcg)) { for_each_node(nid) { mz = mem_cgroup_nodeinfo(memcg, nid); for (i = 0; i <= DEF_PRIORITY; i++) { diff --git a/mm/mmap.c b/mm/mmap.c index 5801b5f0a634..ff1944d8d458 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -182,7 +182,7 @@ static struct vm_area_struct *remove_vma(struct vm_area_struct *vma) if (vma->vm_file) fput(vma->vm_file); mpol_put(vma_policy(vma)); - kmem_cache_free(vm_area_cachep, vma); + vm_area_free(vma); return next; } @@ -911,7 +911,7 @@ again: anon_vma_merge(vma, next); mm->map_count--; mpol_put(vma_policy(next)); - kmem_cache_free(vm_area_cachep, next); + vm_area_free(next); /* * In mprotect's case 6 (see comments on vma_merge), * we must remove another next too. It would clutter @@ -1729,19 +1729,17 @@ unsigned long mmap_region(struct file *file, unsigned long addr, * specific mapper. the address has already been validated, but * not unmapped, but the maps are removed from the list. */ - vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); + vma = vm_area_alloc(mm); if (!vma) { error = -ENOMEM; goto unacct_error; } - vma->vm_mm = mm; vma->vm_start = addr; vma->vm_end = addr + len; vma->vm_flags = vm_flags; vma->vm_page_prot = vm_get_page_prot(vm_flags); vma->vm_pgoff = pgoff; - INIT_LIST_HEAD(&vma->anon_vma_chain); if (file) { if (vm_flags & VM_DENYWRITE) { @@ -1832,7 +1830,7 @@ allow_write_and_free_vma: if (vm_flags & VM_DENYWRITE) allow_write_access(file); free_vma: - kmem_cache_free(vm_area_cachep, vma); + vm_area_free(vma); unacct_error: if (charged) vm_unacct_memory(charged); @@ -2620,15 +2618,10 @@ int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma, return err; } - new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); + new = vm_area_dup(vma); if (!new) return -ENOMEM; - /* most fields are the same, copy all, and then fixup */ - *new = *vma; - - INIT_LIST_HEAD(&new->anon_vma_chain); - if (new_below) new->vm_end = addr; else { @@ -2669,7 +2662,7 @@ int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma, out_free_mpol: mpol_put(vma_policy(new)); out_free_vma: - kmem_cache_free(vm_area_cachep, new); + vm_area_free(new); return err; } @@ -2984,14 +2977,12 @@ static int do_brk_flags(unsigned long addr, unsigned long len, unsigned long fla /* * create a vma struct for an anonymous mapping */ - vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); + vma = vm_area_alloc(mm); if (!vma) { vm_unacct_memory(len >> PAGE_SHIFT); return -ENOMEM; } - INIT_LIST_HEAD(&vma->anon_vma_chain); - vma->vm_mm = mm; vma->vm_start = addr; vma->vm_end = addr + len; vma->vm_pgoff = pgoff; @@ -3202,16 +3193,14 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, } *need_rmap_locks = (new_vma->vm_pgoff <= vma->vm_pgoff); } else { - new_vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); + new_vma = vm_area_dup(vma); if (!new_vma) goto out; - *new_vma = *vma; new_vma->vm_start = addr; new_vma->vm_end = addr + len; new_vma->vm_pgoff = pgoff; if (vma_dup_policy(vma, new_vma)) goto out_free_vma; - INIT_LIST_HEAD(&new_vma->anon_vma_chain); if (anon_vma_clone(new_vma, vma)) goto out_free_mempol; if (new_vma->vm_file) @@ -3226,7 +3215,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, out_free_mempol: mpol_put(vma_policy(new_vma)); out_free_vma: - kmem_cache_free(vm_area_cachep, new_vma); + vm_area_free(new_vma); out: return NULL; } @@ -3350,12 +3339,10 @@ static struct vm_area_struct *__install_special_mapping( int ret; struct vm_area_struct *vma; - vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); + vma = vm_area_alloc(mm); if (unlikely(vma == NULL)) return ERR_PTR(-ENOMEM); - INIT_LIST_HEAD(&vma->anon_vma_chain); - vma->vm_mm = mm; vma->vm_start = addr; vma->vm_end = addr + len; @@ -3376,7 +3363,7 @@ static struct vm_area_struct *__install_special_mapping( return vma; out: - kmem_cache_free(vm_area_cachep, vma); + vm_area_free(vma); return ERR_PTR(ret); } diff --git a/mm/nommu.c b/mm/nommu.c index 4452d8bd9ae4..1d22fdbf7d7c 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -769,7 +769,7 @@ static void delete_vma(struct mm_struct *mm, struct vm_area_struct *vma) if (vma->vm_file) fput(vma->vm_file); put_nommu_region(vma->vm_region); - kmem_cache_free(vm_area_cachep, vma); + vm_area_free(vma); } /* @@ -1204,7 +1204,7 @@ unsigned long do_mmap(struct file *file, if (!region) goto error_getting_region; - vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); + vma = vm_area_alloc(current->mm); if (!vma) goto error_getting_vma; @@ -1212,7 +1212,6 @@ unsigned long do_mmap(struct file *file, region->vm_flags = vm_flags; region->vm_pgoff = pgoff; - INIT_LIST_HEAD(&vma->anon_vma_chain); vma->vm_flags = vm_flags; vma->vm_pgoff = pgoff; @@ -1368,7 +1367,7 @@ error: kmem_cache_free(vm_region_jar, region); if (vma->vm_file) fput(vma->vm_file); - kmem_cache_free(vm_area_cachep, vma); + vm_area_free(vma); return ret; sharing_violation: @@ -1469,14 +1468,13 @@ int split_vma(struct mm_struct *mm, struct vm_area_struct *vma, if (!region) return -ENOMEM; - new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); + new = vm_area_dup(vma); if (!new) { kmem_cache_free(vm_region_jar, region); return -ENOMEM; } /* most fields are the same, copy all, and then fixup */ - *new = *vma; *region = *vma->vm_region; new->vm_region = region; diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c index e0adcd123f48..711d7156efd8 100644 --- a/net/caif/caif_dev.c +++ b/net/caif/caif_dev.c @@ -131,8 +131,10 @@ static void caif_flow_cb(struct sk_buff *skb) caifd = caif_get(skb->dev); WARN_ON(caifd == NULL); - if (caifd == NULL) + if (!caifd) { + rcu_read_unlock(); return; + } caifd_hold(caifd); rcu_read_unlock(); diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 68bf07206744..43a932cb609b 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -269,7 +269,7 @@ static void __page_pool_empty_ring(struct page_pool *pool) struct page *page; /* Empty recycle ring */ - while ((page = ptr_ring_consume(&pool->ring))) { + while ((page = ptr_ring_consume_bh(&pool->ring))) { /* Verify the refcnt invariant of cached pages */ if (!(page_ref_count(page) == 1)) pr_crit("%s() page_pool refcnt %d violation\n", diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 0c1a00672ba9..266b954f763e 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3720,6 +3720,7 @@ normal: net_warn_ratelimited( "skb_segment: too many frags: %u %u\n", pos, mss); + err = -EINVAL; goto err; } @@ -3753,11 +3754,10 @@ skip_fraglist: perform_csum_check: if (!csum) { - if (skb_has_shared_frag(nskb)) { - err = __skb_linearize(nskb); - if (err) - goto err; - } + if (skb_has_shared_frag(nskb) && + __skb_linearize(nskb)) + goto err; + if (!nskb->remcsum_offload) nskb->ip_summed = CHECKSUM_NONE; SKB_GSO_CB(nskb)->csum = diff --git a/net/core/sock.c b/net/core/sock.c index 03fdea5b0f57..9c6ebbdfebf3 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2316,9 +2316,9 @@ int sk_alloc_sg(struct sock *sk, int len, struct scatterlist *sg, pfrag->offset += use; sge = sg + sg_curr - 1; - if (sg_curr > first_coalesce && sg_page(sg) == pfrag->page && - sg->offset + sg->length == orig_offset) { - sg->length += use; + if (sg_curr > first_coalesce && sg_page(sge) == pfrag->page && + sge->offset + sge->length == orig_offset) { + sge->length += use; } else { sge = sg + sg_curr; sg_unmark_end(sge); diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 598333b123b9..bae9096821be 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -1200,8 +1200,7 @@ static void igmpv3_del_delrec(struct in_device *in_dev, struct ip_mc_list *im) spin_lock_bh(&im->lock); if (pmc) { im->interface = pmc->interface; - im->sfmode = pmc->sfmode; - if (pmc->sfmode == MCAST_INCLUDE) { + if (im->sfmode == MCAST_INCLUDE) { im->tomb = pmc->tomb; im->sources = pmc->sources; for (psf = im->sources; psf; psf = psf->sf_next) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index e2b6bd478afb..9c4e72e9c60a 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -524,6 +524,8 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from) to->dev = from->dev; to->mark = from->mark; + skb_copy_hash(to, from); + /* Copy the flags to each fragment. */ IPCB(to)->flags = IPCB(from)->flags; diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 64c76dcf7386..c0fe5ad996f2 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -150,15 +150,18 @@ static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb) { struct sockaddr_in sin; const struct iphdr *iph = ip_hdr(skb); - __be16 *ports = (__be16 *)skb_transport_header(skb); + __be16 *ports; + int end; - if (skb_transport_offset(skb) + 4 > (int)skb->len) + end = skb_transport_offset(skb) + 4; + if (end > 0 && !pskb_may_pull(skb, end)) return; /* All current transport protocols have the port numbers in the * first four bytes of the transport header and this function is * written with this assumption in mind. */ + ports = (__be16 *)skb_transport_header(skb); sin.sin_family = AF_INET; sin.sin_addr.s_addr = iph->daddr; diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c index 5869f89ca656..8b637f9f23a2 100644 --- a/net/ipv4/tcp_dctcp.c +++ b/net/ipv4/tcp_dctcp.c @@ -129,24 +129,14 @@ static void dctcp_ce_state_0_to_1(struct sock *sk) struct dctcp *ca = inet_csk_ca(sk); struct tcp_sock *tp = tcp_sk(sk); - /* State has changed from CE=0 to CE=1 and delayed - * ACK has not sent yet. - */ - if (!ca->ce_state && - inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER) { - u32 tmp_rcv_nxt; - - /* Save current rcv_nxt. */ - tmp_rcv_nxt = tp->rcv_nxt; - - /* Generate previous ack with CE=0. */ - tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR; - tp->rcv_nxt = ca->prior_rcv_nxt; - - tcp_send_ack(sk); - - /* Recover current rcv_nxt. */ - tp->rcv_nxt = tmp_rcv_nxt; + if (!ca->ce_state) { + /* State has changed from CE=0 to CE=1, force an immediate + * ACK to reflect the new CE state. If an ACK was delayed, + * send that first to reflect the prior CE state. + */ + if (inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER) + __tcp_send_ack(sk, ca->prior_rcv_nxt); + tcp_enter_quickack_mode(sk, 1); } ca->prior_rcv_nxt = tp->rcv_nxt; @@ -160,24 +150,14 @@ static void dctcp_ce_state_1_to_0(struct sock *sk) struct dctcp *ca = inet_csk_ca(sk); struct tcp_sock *tp = tcp_sk(sk); - /* State has changed from CE=1 to CE=0 and delayed - * ACK has not sent yet. - */ - if (ca->ce_state && - inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER) { - u32 tmp_rcv_nxt; - - /* Save current rcv_nxt. */ - tmp_rcv_nxt = tp->rcv_nxt; - - /* Generate previous ack with CE=1. */ - tp->ecn_flags |= TCP_ECN_DEMAND_CWR; - tp->rcv_nxt = ca->prior_rcv_nxt; - - tcp_send_ack(sk); - - /* Recover current rcv_nxt. */ - tp->rcv_nxt = tmp_rcv_nxt; + if (ca->ce_state) { + /* State has changed from CE=1 to CE=0, force an immediate + * ACK to reflect the new CE state. If an ACK was delayed, + * send that first to reflect the prior CE state. + */ + if (inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER) + __tcp_send_ack(sk, ca->prior_rcv_nxt); + tcp_enter_quickack_mode(sk, 1); } ca->prior_rcv_nxt = tp->rcv_nxt; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 91dbb9afb950..d51fa358b2b1 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -216,7 +216,7 @@ static void tcp_incr_quickack(struct sock *sk, unsigned int max_quickacks) icsk->icsk_ack.quick = quickacks; } -static void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks) +void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks) { struct inet_connection_sock *icsk = inet_csk(sk); @@ -224,6 +224,7 @@ static void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks) icsk->icsk_ack.pingpong = 0; icsk->icsk_ack.ato = TCP_ATO_MIN; } +EXPORT_SYMBOL(tcp_enter_quickack_mode); /* Send ACKs quickly, if "quick" count is not exhausted * and the session is not interactive. @@ -4366,6 +4367,23 @@ static bool tcp_try_coalesce(struct sock *sk, return true; } +static bool tcp_ooo_try_coalesce(struct sock *sk, + struct sk_buff *to, + struct sk_buff *from, + bool *fragstolen) +{ + bool res = tcp_try_coalesce(sk, to, from, fragstolen); + + /* In case tcp_drop() is called later, update to->gso_segs */ + if (res) { + u32 gso_segs = max_t(u16, 1, skb_shinfo(to)->gso_segs) + + max_t(u16, 1, skb_shinfo(from)->gso_segs); + + skb_shinfo(to)->gso_segs = min_t(u32, gso_segs, 0xFFFF); + } + return res; +} + static void tcp_drop(struct sock *sk, struct sk_buff *skb) { sk_drops_add(sk, skb); @@ -4489,8 +4507,8 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) /* In the typical case, we are adding an skb to the end of the list. * Use of ooo_last_skb avoids the O(Log(N)) rbtree lookup. */ - if (tcp_try_coalesce(sk, tp->ooo_last_skb, - skb, &fragstolen)) { + if (tcp_ooo_try_coalesce(sk, tp->ooo_last_skb, + skb, &fragstolen)) { coalesce_done: tcp_grow_window(sk, skb); kfree_skb_partial(skb, fragstolen); @@ -4518,7 +4536,7 @@ coalesce_done: /* All the bits are present. Drop. */ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOMERGE); - __kfree_skb(skb); + tcp_drop(sk, skb); skb = NULL; tcp_dsack_set(sk, seq, end_seq); goto add_sack; @@ -4537,11 +4555,11 @@ coalesce_done: TCP_SKB_CB(skb1)->end_seq); NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOMERGE); - __kfree_skb(skb1); + tcp_drop(sk, skb1); goto merge_right; } - } else if (tcp_try_coalesce(sk, skb1, - skb, &fragstolen)) { + } else if (tcp_ooo_try_coalesce(sk, skb1, + skb, &fragstolen)) { goto coalesce_done; } p = &parent->rb_right; @@ -4924,6 +4942,7 @@ end: static void tcp_collapse_ofo_queue(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); + u32 range_truesize, sum_tiny = 0; struct sk_buff *skb, *head; u32 start, end; @@ -4935,6 +4954,7 @@ new_range: } start = TCP_SKB_CB(skb)->seq; end = TCP_SKB_CB(skb)->end_seq; + range_truesize = skb->truesize; for (head = skb;;) { skb = skb_rb_next(skb); @@ -4945,11 +4965,20 @@ new_range: if (!skb || after(TCP_SKB_CB(skb)->seq, end) || before(TCP_SKB_CB(skb)->end_seq, start)) { - tcp_collapse(sk, NULL, &tp->out_of_order_queue, - head, skb, start, end); + /* Do not attempt collapsing tiny skbs */ + if (range_truesize != head->truesize || + end - start >= SKB_WITH_OVERHEAD(SK_MEM_QUANTUM)) { + tcp_collapse(sk, NULL, &tp->out_of_order_queue, + head, skb, start, end); + } else { + sum_tiny += range_truesize; + if (sum_tiny > sk->sk_rcvbuf >> 3) + return; + } goto new_range; } + range_truesize += skb->truesize; if (unlikely(before(TCP_SKB_CB(skb)->seq, start))) start = TCP_SKB_CB(skb)->seq; if (after(TCP_SKB_CB(skb)->end_seq, end)) @@ -4964,6 +4993,7 @@ new_range: * 2) not add too big latencies if thousands of packets sit there. * (But if application shrinks SO_RCVBUF, we could still end up * freeing whole queue here) + * 3) Drop at least 12.5 % of sk_rcvbuf to avoid malicious attacks. * * Return true if queue has shrunk. */ @@ -4971,20 +5001,26 @@ static bool tcp_prune_ofo_queue(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); struct rb_node *node, *prev; + int goal; if (RB_EMPTY_ROOT(&tp->out_of_order_queue)) return false; NET_INC_STATS(sock_net(sk), LINUX_MIB_OFOPRUNED); + goal = sk->sk_rcvbuf >> 3; node = &tp->ooo_last_skb->rbnode; do { prev = rb_prev(node); rb_erase(node, &tp->out_of_order_queue); + goal -= rb_to_skb(node)->truesize; tcp_drop(sk, rb_to_skb(node)); - sk_mem_reclaim(sk); - if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf && - !tcp_under_memory_pressure(sk)) - break; + if (!prev || goal <= 0) { + sk_mem_reclaim(sk); + if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf && + !tcp_under_memory_pressure(sk)) + break; + goal = sk->sk_rcvbuf >> 3; + } node = prev; } while (node); tp->ooo_last_skb = rb_to_skb(prev); @@ -5019,6 +5055,9 @@ static int tcp_prune_queue(struct sock *sk) else if (tcp_under_memory_pressure(sk)) tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss); + if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) + return 0; + tcp_collapse_ofo_queue(sk); if (!skb_queue_empty(&sk->sk_receive_queue)) tcp_collapse(sk, &sk->sk_receive_queue, NULL, diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 6cbab56e7407..490df62f26d4 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -160,7 +160,8 @@ static void tcp_event_data_sent(struct tcp_sock *tp, } /* Account for an ACK we sent. */ -static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts) +static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts, + u32 rcv_nxt) { struct tcp_sock *tp = tcp_sk(sk); @@ -171,6 +172,9 @@ static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts) if (hrtimer_try_to_cancel(&tp->compressed_ack_timer) == 1) __sock_put(sk); } + + if (unlikely(rcv_nxt != tp->rcv_nxt)) + return; /* Special ACK sent by DCTCP to reflect ECN */ tcp_dec_quickack_mode(sk, pkts); inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK); } @@ -1009,8 +1013,8 @@ static void tcp_update_skb_after_send(struct tcp_sock *tp, struct sk_buff *skb) * We are working here with either a clone of the original * SKB, or a fresh unique copy made by the retransmit engine. */ -static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, - gfp_t gfp_mask) +static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, + int clone_it, gfp_t gfp_mask, u32 rcv_nxt) { const struct inet_connection_sock *icsk = inet_csk(sk); struct inet_sock *inet; @@ -1086,7 +1090,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, th->source = inet->inet_sport; th->dest = inet->inet_dport; th->seq = htonl(tcb->seq); - th->ack_seq = htonl(tp->rcv_nxt); + th->ack_seq = htonl(rcv_nxt); *(((__be16 *)th) + 6) = htons(((tcp_header_size >> 2) << 12) | tcb->tcp_flags); @@ -1127,7 +1131,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, icsk->icsk_af_ops->send_check(sk, skb); if (likely(tcb->tcp_flags & TCPHDR_ACK)) - tcp_event_ack_sent(sk, tcp_skb_pcount(skb)); + tcp_event_ack_sent(sk, tcp_skb_pcount(skb), rcv_nxt); if (skb->len != tcp_header_size) { tcp_event_data_sent(tp, sk); @@ -1164,6 +1168,13 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, return err; } +static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, + gfp_t gfp_mask) +{ + return __tcp_transmit_skb(sk, skb, clone_it, gfp_mask, + tcp_sk(sk)->rcv_nxt); +} + /* This routine just queues the buffer for sending. * * NOTE: probe0 timer is not checked, do not forget tcp_push_pending_frames, @@ -3557,7 +3568,7 @@ void tcp_send_delayed_ack(struct sock *sk) } /* This routine sends an ack and also updates the window. */ -void tcp_send_ack(struct sock *sk) +void __tcp_send_ack(struct sock *sk, u32 rcv_nxt) { struct sk_buff *buff; @@ -3590,9 +3601,14 @@ void tcp_send_ack(struct sock *sk) skb_set_tcp_pure_ack(buff); /* Send it off, this clears delayed acks for us. */ - tcp_transmit_skb(sk, buff, 0, (__force gfp_t)0); + __tcp_transmit_skb(sk, buff, 0, (__force gfp_t)0, rcv_nxt); +} +EXPORT_SYMBOL_GPL(__tcp_send_ack); + +void tcp_send_ack(struct sock *sk) +{ + __tcp_send_ack(sk, tcp_sk(sk)->rcv_nxt); } -EXPORT_SYMBOL_GPL(tcp_send_ack); /* This routine sends a packet with an out of date sequence * number. It assumes the other end will try to ack it. diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 1659a6b3cf42..2fac4ad74867 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2372,7 +2372,8 @@ static struct fib6_info *addrconf_get_prefix_route(const struct in6_addr *pfx, continue; if ((rt->fib6_flags & noflags) != 0) continue; - fib6_info_hold(rt); + if (!fib6_info_hold_safe(rt)) + continue; break; } out: diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 201306b9b5ea..5a094f58fe8a 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -700,13 +700,16 @@ void ip6_datagram_recv_specific_ctl(struct sock *sk, struct msghdr *msg, } if (np->rxopt.bits.rxorigdstaddr) { struct sockaddr_in6 sin6; - __be16 *ports = (__be16 *) skb_transport_header(skb); + __be16 *ports; + int end; - if (skb_transport_offset(skb) + 4 <= (int)skb->len) { + end = skb_transport_offset(skb) + 4; + if (end <= 0 || pskb_may_pull(skb, end)) { /* All current transport protocols have the port numbers in the * first four bytes of the transport header and this function is * written with this assumption in mind. */ + ports = (__be16 *)skb_transport_header(skb); sin6.sin6_family = AF_INET6; sin6.sin6_addr = ipv6_hdr(skb)->daddr; diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 24611c8b0562..00d159d431dc 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -402,9 +402,10 @@ static int icmp6_iif(const struct sk_buff *skb) /* for local traffic to local address, skb dev is the loopback * device. Check if there is a dst attached to the skb and if so - * get the real device index. + * get the real device index. Same is needed for replies to a link + * local address on a device enslaved to an L3 master device */ - if (unlikely(iif == LOOPBACK_IFINDEX)) { + if (unlikely(iif == LOOPBACK_IFINDEX || netif_is_l3_master(skb->dev))) { const struct rt6_info *rt6 = skb_rt6_info(skb); if (rt6) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 8047fd41ba88..16f200f06500 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -570,6 +570,8 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from) to->dev = from->dev; to->mark = from->mark; + skb_copy_hash(to, from); + #ifdef CONFIG_NET_SCHED to->tc_index = from->tc_index; #endif diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 195ed2db2207..4ae54aaca373 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -790,8 +790,7 @@ static void mld_del_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im) spin_lock_bh(&im->mca_lock); if (pmc) { im->idev = pmc->idev; - im->mca_sfmode = pmc->mca_sfmode; - if (pmc->mca_sfmode == MCAST_INCLUDE) { + if (im->mca_sfmode == MCAST_INCLUDE) { im->mca_tomb = pmc->mca_tomb; im->mca_sources = pmc->mca_sources; for (psf = im->mca_sources; psf; psf = psf->sf_next) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 2ce0bd17de4f..ec18b3ce8b6d 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -972,10 +972,10 @@ static void ip6_rt_init_dst(struct rt6_info *rt, struct fib6_info *ort) rt->dst.lastuse = jiffies; } +/* Caller must already hold reference to @from */ static void rt6_set_from(struct rt6_info *rt, struct fib6_info *from) { rt->rt6i_flags &= ~RTF_EXPIRES; - fib6_info_hold(from); rcu_assign_pointer(rt->from, from); dst_init_metrics(&rt->dst, from->fib6_metrics->metrics, true); if (from->fib6_metrics != &dst_default_metrics) { @@ -984,6 +984,7 @@ static void rt6_set_from(struct rt6_info *rt, struct fib6_info *from) } } +/* Caller must already hold reference to @ort */ static void ip6_rt_copy_init(struct rt6_info *rt, struct fib6_info *ort) { struct net_device *dev = fib6_info_nh_dev(ort); @@ -1044,9 +1045,14 @@ static struct rt6_info *ip6_create_rt_rcu(struct fib6_info *rt) struct net_device *dev = rt->fib6_nh.nh_dev; struct rt6_info *nrt; + if (!fib6_info_hold_safe(rt)) + return NULL; + nrt = ip6_dst_alloc(dev_net(dev), dev, flags); if (nrt) ip6_rt_copy_init(nrt, rt); + else + fib6_info_release(rt); return nrt; } @@ -1178,10 +1184,15 @@ static struct rt6_info *ip6_rt_cache_alloc(struct fib6_info *ort, * Clone the route. */ + if (!fib6_info_hold_safe(ort)) + return NULL; + dev = ip6_rt_get_dev_rcu(ort); rt = ip6_dst_alloc(dev_net(dev), dev, 0); - if (!rt) + if (!rt) { + fib6_info_release(ort); return NULL; + } ip6_rt_copy_init(rt, ort); rt->rt6i_flags |= RTF_CACHE; @@ -1210,12 +1221,17 @@ static struct rt6_info *ip6_rt_pcpu_alloc(struct fib6_info *rt) struct net_device *dev; struct rt6_info *pcpu_rt; + if (!fib6_info_hold_safe(rt)) + return NULL; + rcu_read_lock(); dev = ip6_rt_get_dev_rcu(rt); pcpu_rt = ip6_dst_alloc(dev_net(dev), dev, flags); rcu_read_unlock(); - if (!pcpu_rt) + if (!pcpu_rt) { + fib6_info_release(rt); return NULL; + } ip6_rt_copy_init(pcpu_rt, rt); pcpu_rt->rt6i_flags |= RTF_PCPU; return pcpu_rt; @@ -2486,7 +2502,7 @@ restart: out: if (ret) - dst_hold(&ret->dst); + ip6_hold_safe(net, &ret, true); else ret = ip6_create_rt_rcu(rt); @@ -3303,7 +3319,8 @@ static int ip6_route_del(struct fib6_config *cfg, continue; if (cfg->fc_protocol && cfg->fc_protocol != rt->fib6_protocol) continue; - fib6_info_hold(rt); + if (!fib6_info_hold_safe(rt)) + continue; rcu_read_unlock(); /* if gateway was specified only delete the one hop */ @@ -3409,6 +3426,9 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu rcu_read_lock(); from = rcu_dereference(rt->from); + /* This fib6_info_hold() is safe here because we hold reference to rt + * and rt already holds reference to fib6_info. + */ fib6_info_hold(from); rcu_read_unlock(); @@ -3470,7 +3490,8 @@ static struct fib6_info *rt6_get_route_info(struct net *net, continue; if (!ipv6_addr_equal(&rt->fib6_nh.nh_gw, gwaddr)) continue; - fib6_info_hold(rt); + if (!fib6_info_hold_safe(rt)) + continue; break; } out: @@ -3530,8 +3551,8 @@ struct fib6_info *rt6_get_dflt_router(struct net *net, ipv6_addr_equal(&rt->fib6_nh.nh_gw, addr)) break; } - if (rt) - fib6_info_hold(rt); + if (rt && !fib6_info_hold_safe(rt)) + rt = NULL; rcu_read_unlock(); return rt; } @@ -3579,8 +3600,8 @@ restart: struct inet6_dev *idev = dev ? __in6_dev_get(dev) : NULL; if (rt->fib6_flags & (RTF_DEFAULT | RTF_ADDRCONF) && - (!idev || idev->cnf.accept_ra != 2)) { - fib6_info_hold(rt); + (!idev || idev->cnf.accept_ra != 2) && + fib6_info_hold_safe(rt)) { rcu_read_unlock(); ip6_del_rt(net, rt); goto restart; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 7efa9fd7e109..03e6b7a2bc53 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -938,7 +938,8 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) &tcp_hashinfo, NULL, 0, &ipv6h->saddr, th->source, &ipv6h->daddr, - ntohs(th->source), tcp_v6_iif(skb), + ntohs(th->source), + tcp_v6_iif_l3_slave(skb), tcp_v6_sdif(skb)); if (!sk1) goto out; @@ -1609,7 +1610,8 @@ do_time_wait: skb, __tcp_hdrlen(th), &ipv6_hdr(skb)->saddr, th->source, &ipv6_hdr(skb)->daddr, - ntohs(th->dest), tcp_v6_iif(skb), + ntohs(th->dest), + tcp_v6_iif_l3_slave(skb), sdif); if (sk2) { struct inet_timewait_sock *tw = inet_twsk(sk); diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index a16ba568e2a3..64742f2765c4 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2370,11 +2370,8 @@ static void ieee80211_deliver_skb_to_local_stack(struct sk_buff *skb, sdata->control_port_over_nl80211)) { struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); bool noencrypt = status->flag & RX_FLAG_DECRYPTED; - struct ethhdr *ehdr = eth_hdr(skb); - cfg80211_rx_control_port(dev, skb->data, skb->len, - ehdr->h_source, - be16_to_cpu(skb->protocol), noencrypt); + cfg80211_rx_control_port(dev, skb, noencrypt); dev_kfree_skb(skb); } else { /* deliver to local stack */ diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 3e68132a41fa..88efda7c9f8a 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -2140,7 +2140,8 @@ int ieee80211_reconfig(struct ieee80211_local *local) if (!sta->uploaded) continue; - if (sta->sdata->vif.type != NL80211_IFTYPE_AP) + if (sta->sdata->vif.type != NL80211_IFTYPE_AP && + sta->sdata->vif.type != NL80211_IFTYPE_AP_VLAN) continue; for (state = IEEE80211_STA_NOTEXIST; diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index f476d116c816..8c58f96b59e7 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -244,14 +244,14 @@ dccp_state_table[CT_DCCP_ROLE_MAX + 1][DCCP_PKT_SYNCACK + 1][CT_DCCP_MAX + 1] = * We currently ignore Sync packets * * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ - sIG, sIG, sIG, sIG, sIG, sIG, sIG, sIG, + sIV, sIG, sIG, sIG, sIG, sIG, sIG, sIG, }, [DCCP_PKT_SYNCACK] = { /* * We currently ignore SyncAck packets * * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ - sIG, sIG, sIG, sIG, sIG, sIG, sIG, sIG, + sIV, sIG, sIG, sIG, sIG, sIG, sIG, sIG, }, }, [CT_DCCP_ROLE_SERVER] = { @@ -372,14 +372,14 @@ dccp_state_table[CT_DCCP_ROLE_MAX + 1][DCCP_PKT_SYNCACK + 1][CT_DCCP_MAX + 1] = * We currently ignore Sync packets * * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ - sIG, sIG, sIG, sIG, sIG, sIG, sIG, sIG, + sIV, sIG, sIG, sIG, sIG, sIG, sIG, sIG, }, [DCCP_PKT_SYNCACK] = { /* * We currently ignore SyncAck packets * * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ - sIG, sIG, sIG, sIG, sIG, sIG, sIG, sIG, + sIV, sIG, sIG, sIG, sIG, sIG, sIG, sIG, }, }, }; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index c0fb2bcd30fe..f18085639807 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -76,6 +76,7 @@ static void nft_ctx_init(struct nft_ctx *ctx, { ctx->net = net; ctx->family = family; + ctx->level = 0; ctx->table = table; ctx->chain = chain; ctx->nla = nla; @@ -1651,7 +1652,6 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy, struct nft_base_chain *basechain; struct nft_stats *stats = NULL; struct nft_chain_hook hook; - const struct nlattr *name; struct nf_hook_ops *ops; struct nft_trans *trans; int err; @@ -1700,12 +1700,11 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy, return PTR_ERR(stats); } + err = -ENOMEM; trans = nft_trans_alloc(ctx, NFT_MSG_NEWCHAIN, sizeof(struct nft_trans_chain)); - if (trans == NULL) { - free_percpu(stats); - return -ENOMEM; - } + if (trans == NULL) + goto err; nft_trans_chain_stats(trans) = stats; nft_trans_chain_update(trans) = true; @@ -1715,19 +1714,37 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy, else nft_trans_chain_policy(trans) = -1; - name = nla[NFTA_CHAIN_NAME]; - if (nla[NFTA_CHAIN_HANDLE] && name) { - nft_trans_chain_name(trans) = - nla_strdup(name, GFP_KERNEL); - if (!nft_trans_chain_name(trans)) { - kfree(trans); - free_percpu(stats); - return -ENOMEM; + if (nla[NFTA_CHAIN_HANDLE] && + nla[NFTA_CHAIN_NAME]) { + struct nft_trans *tmp; + char *name; + + err = -ENOMEM; + name = nla_strdup(nla[NFTA_CHAIN_NAME], GFP_KERNEL); + if (!name) + goto err; + + err = -EEXIST; + list_for_each_entry(tmp, &ctx->net->nft.commit_list, list) { + if (tmp->msg_type == NFT_MSG_NEWCHAIN && + tmp->ctx.table == table && + nft_trans_chain_update(tmp) && + nft_trans_chain_name(tmp) && + strcmp(name, nft_trans_chain_name(tmp)) == 0) { + kfree(name); + goto err; + } } + + nft_trans_chain_name(trans) = name; } list_add_tail(&trans->list, &ctx->net->nft.commit_list); return 0; +err: + free_percpu(stats); + kfree(trans); + return err; } static int nf_tables_newchain(struct net *net, struct sock *nlsk, @@ -2309,6 +2326,39 @@ done: return skb->len; } +static int nf_tables_dump_rules_start(struct netlink_callback *cb) +{ + const struct nlattr * const *nla = cb->data; + struct nft_rule_dump_ctx *ctx = NULL; + + if (nla[NFTA_RULE_TABLE] || nla[NFTA_RULE_CHAIN]) { + ctx = kzalloc(sizeof(*ctx), GFP_ATOMIC); + if (!ctx) + return -ENOMEM; + + if (nla[NFTA_RULE_TABLE]) { + ctx->table = nla_strdup(nla[NFTA_RULE_TABLE], + GFP_ATOMIC); + if (!ctx->table) { + kfree(ctx); + return -ENOMEM; + } + } + if (nla[NFTA_RULE_CHAIN]) { + ctx->chain = nla_strdup(nla[NFTA_RULE_CHAIN], + GFP_ATOMIC); + if (!ctx->chain) { + kfree(ctx->table); + kfree(ctx); + return -ENOMEM; + } + } + } + + cb->data = ctx; + return 0; +} + static int nf_tables_dump_rules_done(struct netlink_callback *cb) { struct nft_rule_dump_ctx *ctx = cb->data; @@ -2338,38 +2388,13 @@ static int nf_tables_getrule(struct net *net, struct sock *nlsk, if (nlh->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { + .start= nf_tables_dump_rules_start, .dump = nf_tables_dump_rules, .done = nf_tables_dump_rules_done, .module = THIS_MODULE, + .data = (void *)nla, }; - if (nla[NFTA_RULE_TABLE] || nla[NFTA_RULE_CHAIN]) { - struct nft_rule_dump_ctx *ctx; - - ctx = kzalloc(sizeof(*ctx), GFP_ATOMIC); - if (!ctx) - return -ENOMEM; - - if (nla[NFTA_RULE_TABLE]) { - ctx->table = nla_strdup(nla[NFTA_RULE_TABLE], - GFP_ATOMIC); - if (!ctx->table) { - kfree(ctx); - return -ENOMEM; - } - } - if (nla[NFTA_RULE_CHAIN]) { - ctx->chain = nla_strdup(nla[NFTA_RULE_CHAIN], - GFP_ATOMIC); - if (!ctx->chain) { - kfree(ctx->table); - kfree(ctx); - return -ENOMEM; - } - } - c.data = ctx; - } - return nft_netlink_dump_start_rcu(nlsk, skb, nlh, &c); } @@ -2440,6 +2465,9 @@ int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain) struct nft_rule *rule; int err; + if (ctx->level == NFT_JUMP_STACK_SIZE) + return -EMLINK; + list_for_each_entry(rule, &chain->rules, list) { if (!nft_is_active_next(ctx->net, rule)) continue; @@ -3227,6 +3255,18 @@ done: return skb->len; } +static int nf_tables_dump_sets_start(struct netlink_callback *cb) +{ + struct nft_ctx *ctx_dump = NULL; + + ctx_dump = kmemdup(cb->data, sizeof(*ctx_dump), GFP_ATOMIC); + if (ctx_dump == NULL) + return -ENOMEM; + + cb->data = ctx_dump; + return 0; +} + static int nf_tables_dump_sets_done(struct netlink_callback *cb) { kfree(cb->data); @@ -3254,18 +3294,12 @@ static int nf_tables_getset(struct net *net, struct sock *nlsk, if (nlh->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { + .start = nf_tables_dump_sets_start, .dump = nf_tables_dump_sets, .done = nf_tables_dump_sets_done, + .data = &ctx, .module = THIS_MODULE, }; - struct nft_ctx *ctx_dump; - - ctx_dump = kmalloc(sizeof(*ctx_dump), GFP_ATOMIC); - if (ctx_dump == NULL) - return -ENOMEM; - - *ctx_dump = ctx; - c.data = ctx_dump; return nft_netlink_dump_start_rcu(nlsk, skb, nlh, &c); } @@ -3915,6 +3949,15 @@ nla_put_failure: return -ENOSPC; } +static int nf_tables_dump_set_start(struct netlink_callback *cb) +{ + struct nft_set_dump_ctx *dump_ctx = cb->data; + + cb->data = kmemdup(dump_ctx, sizeof(*dump_ctx), GFP_ATOMIC); + + return cb->data ? 0 : -ENOMEM; +} + static int nf_tables_dump_set_done(struct netlink_callback *cb) { kfree(cb->data); @@ -4068,20 +4111,17 @@ static int nf_tables_getsetelem(struct net *net, struct sock *nlsk, if (nlh->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { + .start = nf_tables_dump_set_start, .dump = nf_tables_dump_set, .done = nf_tables_dump_set_done, .module = THIS_MODULE, }; - struct nft_set_dump_ctx *dump_ctx; - - dump_ctx = kmalloc(sizeof(*dump_ctx), GFP_ATOMIC); - if (!dump_ctx) - return -ENOMEM; - - dump_ctx->set = set; - dump_ctx->ctx = ctx; + struct nft_set_dump_ctx dump_ctx = { + .set = set, + .ctx = ctx, + }; - c.data = dump_ctx; + c.data = &dump_ctx; return nft_netlink_dump_start_rcu(nlsk, skb, nlh, &c); } @@ -5041,38 +5081,42 @@ done: return skb->len; } -static int nf_tables_dump_obj_done(struct netlink_callback *cb) +static int nf_tables_dump_obj_start(struct netlink_callback *cb) { - struct nft_obj_filter *filter = cb->data; + const struct nlattr * const *nla = cb->data; + struct nft_obj_filter *filter = NULL; - if (filter) { - kfree(filter->table); - kfree(filter); + if (nla[NFTA_OBJ_TABLE] || nla[NFTA_OBJ_TYPE]) { + filter = kzalloc(sizeof(*filter), GFP_ATOMIC); + if (!filter) + return -ENOMEM; + + if (nla[NFTA_OBJ_TABLE]) { + filter->table = nla_strdup(nla[NFTA_OBJ_TABLE], GFP_ATOMIC); + if (!filter->table) { + kfree(filter); + return -ENOMEM; + } + } + + if (nla[NFTA_OBJ_TYPE]) + filter->type = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE])); } + cb->data = filter; return 0; } -static struct nft_obj_filter * -nft_obj_filter_alloc(const struct nlattr * const nla[]) +static int nf_tables_dump_obj_done(struct netlink_callback *cb) { - struct nft_obj_filter *filter; - - filter = kzalloc(sizeof(*filter), GFP_ATOMIC); - if (!filter) - return ERR_PTR(-ENOMEM); + struct nft_obj_filter *filter = cb->data; - if (nla[NFTA_OBJ_TABLE]) { - filter->table = nla_strdup(nla[NFTA_OBJ_TABLE], GFP_ATOMIC); - if (!filter->table) { - kfree(filter); - return ERR_PTR(-ENOMEM); - } + if (filter) { + kfree(filter->table); + kfree(filter); } - if (nla[NFTA_OBJ_TYPE]) - filter->type = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE])); - return filter; + return 0; } /* called with rcu_read_lock held */ @@ -5093,21 +5137,13 @@ static int nf_tables_getobj(struct net *net, struct sock *nlsk, if (nlh->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { + .start = nf_tables_dump_obj_start, .dump = nf_tables_dump_obj, .done = nf_tables_dump_obj_done, .module = THIS_MODULE, + .data = (void *)nla, }; - if (nla[NFTA_OBJ_TABLE] || - nla[NFTA_OBJ_TYPE]) { - struct nft_obj_filter *filter; - - filter = nft_obj_filter_alloc(nla); - if (IS_ERR(filter)) - return -ENOMEM; - - c.data = filter; - } return nft_netlink_dump_start_rcu(nlsk, skb, nlh, &c); } @@ -5386,8 +5422,6 @@ static int nf_tables_flowtable_parse_hook(const struct nft_ctx *ctx, flowtable->ops[i].priv = &flowtable->data; flowtable->ops[i].hook = flowtable->data.type->hook; flowtable->ops[i].dev = dev_array[i]; - flowtable->dev_name[i] = kstrdup(dev_array[i]->name, - GFP_KERNEL); } return err; @@ -5545,10 +5579,8 @@ static int nf_tables_newflowtable(struct net *net, struct sock *nlsk, err6: i = flowtable->ops_len; err5: - for (k = i - 1; k >= 0; k--) { - kfree(flowtable->dev_name[k]); + for (k = i - 1; k >= 0; k--) nf_unregister_net_hook(net, &flowtable->ops[k]); - } kfree(flowtable->ops); err4: @@ -5647,9 +5679,10 @@ static int nf_tables_fill_flowtable_info(struct sk_buff *skb, struct net *net, goto nla_put_failure; for (i = 0; i < flowtable->ops_len; i++) { - if (flowtable->dev_name[i][0] && - nla_put_string(skb, NFTA_DEVICE_NAME, - flowtable->dev_name[i])) + const struct net_device *dev = READ_ONCE(flowtable->ops[i].dev); + + if (dev && + nla_put_string(skb, NFTA_DEVICE_NAME, dev->name)) goto nla_put_failure; } nla_nest_end(skb, nest_devs); @@ -5716,37 +5749,39 @@ done: return skb->len; } -static int nf_tables_dump_flowtable_done(struct netlink_callback *cb) +static int nf_tables_dump_flowtable_start(struct netlink_callback *cb) { - struct nft_flowtable_filter *filter = cb->data; + const struct nlattr * const *nla = cb->data; + struct nft_flowtable_filter *filter = NULL; - if (!filter) - return 0; + if (nla[NFTA_FLOWTABLE_TABLE]) { + filter = kzalloc(sizeof(*filter), GFP_ATOMIC); + if (!filter) + return -ENOMEM; - kfree(filter->table); - kfree(filter); + filter->table = nla_strdup(nla[NFTA_FLOWTABLE_TABLE], + GFP_ATOMIC); + if (!filter->table) { + kfree(filter); + return -ENOMEM; + } + } + cb->data = filter; return 0; } -static struct nft_flowtable_filter * -nft_flowtable_filter_alloc(const struct nlattr * const nla[]) +static int nf_tables_dump_flowtable_done(struct netlink_callback *cb) { - struct nft_flowtable_filter *filter; + struct nft_flowtable_filter *filter = cb->data; - filter = kzalloc(sizeof(*filter), GFP_ATOMIC); if (!filter) - return ERR_PTR(-ENOMEM); + return 0; - if (nla[NFTA_FLOWTABLE_TABLE]) { - filter->table = nla_strdup(nla[NFTA_FLOWTABLE_TABLE], - GFP_ATOMIC); - if (!filter->table) { - kfree(filter); - return ERR_PTR(-ENOMEM); - } - } - return filter; + kfree(filter->table); + kfree(filter); + + return 0; } /* called with rcu_read_lock held */ @@ -5766,20 +5801,13 @@ static int nf_tables_getflowtable(struct net *net, struct sock *nlsk, if (nlh->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { + .start = nf_tables_dump_flowtable_start, .dump = nf_tables_dump_flowtable, .done = nf_tables_dump_flowtable_done, .module = THIS_MODULE, + .data = (void *)nla, }; - if (nla[NFTA_FLOWTABLE_TABLE]) { - struct nft_flowtable_filter *filter; - - filter = nft_flowtable_filter_alloc(nla); - if (IS_ERR(filter)) - return -ENOMEM; - - c.data = filter; - } return nft_netlink_dump_start_rcu(nlsk, skb, nlh, &c); } @@ -5849,6 +5877,7 @@ static void nf_tables_flowtable_destroy(struct nft_flowtable *flowtable) kfree(flowtable->name); flowtable->data.type->free(&flowtable->data); module_put(flowtable->data.type->owner); + kfree(flowtable); } static int nf_tables_fill_gen_info(struct sk_buff *skb, struct net *net, @@ -5891,7 +5920,6 @@ static void nft_flowtable_event(unsigned long event, struct net_device *dev, continue; nf_unregister_net_hook(dev_net(dev), &flowtable->ops[i]); - flowtable->dev_name[i][0] = '\0'; flowtable->ops[i].dev = NULL; break; } @@ -6152,6 +6180,9 @@ static void nft_commit_release(struct nft_trans *trans) case NFT_MSG_DELTABLE: nf_tables_table_destroy(&trans->ctx); break; + case NFT_MSG_NEWCHAIN: + kfree(nft_trans_chain_name(trans)); + break; case NFT_MSG_DELCHAIN: nf_tables_chain_destroy(&trans->ctx); break; @@ -6381,13 +6412,15 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) nf_tables_table_notify(&trans->ctx, NFT_MSG_DELTABLE); break; case NFT_MSG_NEWCHAIN: - if (nft_trans_chain_update(trans)) + if (nft_trans_chain_update(trans)) { nft_chain_commit_update(trans); - else + nf_tables_chain_notify(&trans->ctx, NFT_MSG_NEWCHAIN); + /* trans destroyed after rcu grace period */ + } else { nft_clear(net, trans->ctx.chain); - - nf_tables_chain_notify(&trans->ctx, NFT_MSG_NEWCHAIN); - nft_trans_destroy(trans); + nf_tables_chain_notify(&trans->ctx, NFT_MSG_NEWCHAIN); + nft_trans_destroy(trans); + } break; case NFT_MSG_DELCHAIN: nft_chain_del(trans->ctx.chain); @@ -6538,7 +6571,7 @@ static int __nf_tables_abort(struct net *net) case NFT_MSG_NEWCHAIN: if (nft_trans_chain_update(trans)) { free_percpu(nft_trans_chain_stats(trans)); - + kfree(nft_trans_chain_name(trans)); nft_trans_destroy(trans); } else { trans->ctx.table->use--; @@ -6918,13 +6951,6 @@ int nft_validate_register_store(const struct nft_ctx *ctx, err = nf_tables_check_loops(ctx, data->verdict.chain); if (err < 0) return err; - - if (ctx->chain->level + 1 > - data->verdict.chain->level) { - if (ctx->chain->level + 1 == NFT_JUMP_STACK_SIZE) - return -EMLINK; - data->verdict.chain->level = ctx->chain->level + 1; - } } return 0; diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c index 15adf8ca82c3..0777a93211e2 100644 --- a/net/netfilter/nft_immediate.c +++ b/net/netfilter/nft_immediate.c @@ -98,6 +98,7 @@ static int nft_immediate_validate(const struct nft_ctx *ctx, const struct nft_data **d) { const struct nft_immediate_expr *priv = nft_expr_priv(expr); + struct nft_ctx *pctx = (struct nft_ctx *)ctx; const struct nft_data *data; int err; @@ -109,9 +110,11 @@ static int nft_immediate_validate(const struct nft_ctx *ctx, switch (data->verdict.code) { case NFT_JUMP: case NFT_GOTO: + pctx->level++; err = nft_chain_validate(ctx, data->verdict.chain); if (err < 0) return err; + pctx->level--; break; default: break; diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c index 42e6fadf1417..c2a1d84cdfc4 100644 --- a/net/netfilter/nft_lookup.c +++ b/net/netfilter/nft_lookup.c @@ -155,7 +155,9 @@ static int nft_lookup_validate_setelem(const struct nft_ctx *ctx, struct nft_set_elem *elem) { const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); + struct nft_ctx *pctx = (struct nft_ctx *)ctx; const struct nft_data *data; + int err; if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) && *nft_set_ext_flags(ext) & NFT_SET_ELEM_INTERVAL_END) @@ -165,10 +167,17 @@ static int nft_lookup_validate_setelem(const struct nft_ctx *ctx, switch (data->verdict.code) { case NFT_JUMP: case NFT_GOTO: - return nft_chain_validate(ctx, data->verdict.chain); + pctx->level++; + err = nft_chain_validate(ctx, data->verdict.chain); + if (err < 0) + return err; + pctx->level--; + break; default: - return 0; + break; } + + return 0; } static int nft_lookup_validate(const struct nft_ctx *ctx, diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index 72ef35b51cac..90c3e7e6cacb 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -387,6 +387,7 @@ static void nft_rhash_destroy(const struct nft_set *set) struct nft_rhash *priv = nft_set_priv(set); cancel_delayed_work_sync(&priv->gc_work); + rcu_barrier(); rhashtable_free_and_destroy(&priv->ht, nft_rhash_elem_destroy, (void *)set); } diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index 1f8f257cb518..9873d734b494 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -381,7 +381,7 @@ static void nft_rbtree_gc(struct work_struct *work) gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC); if (!gcb) - goto out; + break; atomic_dec(&set->nelems); nft_set_gc_batch_add(gcb, rbe); @@ -390,10 +390,12 @@ static void nft_rbtree_gc(struct work_struct *work) rbe = rb_entry(prev, struct nft_rbtree_elem, node); atomic_dec(&set->nelems); nft_set_gc_batch_add(gcb, rbe); + prev = NULL; } node = rb_next(node); + if (!node) + break; } -out: if (gcb) { for (i = 0; i < gcb->head.cnt; i++) { rbe = gcb->elems[i]; @@ -440,6 +442,7 @@ static void nft_rbtree_destroy(const struct nft_set *set) struct rb_node *node; cancel_delayed_work_sync(&priv->gc_work); + rcu_barrier(); while ((node = priv->root.rb_node) != NULL) { rb_erase(node, &priv->root); rbe = rb_entry(node, struct nft_rbtree_elem, node); diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 0c2d029c9d4c..03f1370f5db1 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -631,6 +631,9 @@ static struct sk_buff *tls_wait_data(struct sock *sk, int flags, return NULL; } + if (sk->sk_shutdown & RCV_SHUTDOWN) + return NULL; + if (sock_flag(sk, SOCK_DONE)) return NULL; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index e4e5f025d16b..5fb9b7dd9831 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -4466,6 +4466,7 @@ static int parse_station_flags(struct genl_info *info, params->sta_flags_mask = BIT(NL80211_STA_FLAG_AUTHENTICATED) | BIT(NL80211_STA_FLAG_MFP) | BIT(NL80211_STA_FLAG_AUTHORIZED); + break; default: return -EINVAL; } @@ -15029,20 +15030,24 @@ void cfg80211_mgmt_tx_status(struct wireless_dev *wdev, u64 cookie, EXPORT_SYMBOL(cfg80211_mgmt_tx_status); static int __nl80211_rx_control_port(struct net_device *dev, - const u8 *buf, size_t len, - const u8 *addr, u16 proto, + struct sk_buff *skb, bool unencrypted, gfp_t gfp) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); + struct ethhdr *ehdr = eth_hdr(skb); + const u8 *addr = ehdr->h_source; + u16 proto = be16_to_cpu(skb->protocol); struct sk_buff *msg; void *hdr; + struct nlattr *frame; + u32 nlportid = READ_ONCE(wdev->conn_owner_nlportid); if (!nlportid) return -ENOENT; - msg = nlmsg_new(100 + len, gfp); + msg = nlmsg_new(100 + skb->len, gfp); if (!msg) return -ENOMEM; @@ -15056,13 +15061,17 @@ static int __nl80211_rx_control_port(struct net_device *dev, nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex) || nla_put_u64_64bit(msg, NL80211_ATTR_WDEV, wdev_id(wdev), NL80211_ATTR_PAD) || - nla_put(msg, NL80211_ATTR_FRAME, len, buf) || nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, addr) || nla_put_u16(msg, NL80211_ATTR_CONTROL_PORT_ETHERTYPE, proto) || (unencrypted && nla_put_flag(msg, NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT))) goto nla_put_failure; + frame = nla_reserve(msg, NL80211_ATTR_FRAME, skb->len); + if (!frame) + goto nla_put_failure; + + skb_copy_bits(skb, 0, nla_data(frame), skb->len); genlmsg_end(msg, hdr); return genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, nlportid); @@ -15073,14 +15082,12 @@ static int __nl80211_rx_control_port(struct net_device *dev, } bool cfg80211_rx_control_port(struct net_device *dev, - const u8 *buf, size_t len, - const u8 *addr, u16 proto, bool unencrypted) + struct sk_buff *skb, bool unencrypted) { int ret; - trace_cfg80211_rx_control_port(dev, buf, len, addr, proto, unencrypted); - ret = __nl80211_rx_control_port(dev, buf, len, addr, proto, - unencrypted, GFP_ATOMIC); + trace_cfg80211_rx_control_port(dev, skb, unencrypted); + ret = __nl80211_rx_control_port(dev, skb, unencrypted, GFP_ATOMIC); trace_cfg80211_return_bool(ret == 0); return ret == 0; } diff --git a/net/wireless/reg.c b/net/wireless/reg.c index bbe6298e4bb9..4fc66a117b7d 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -2240,7 +2240,9 @@ static void wiphy_update_regulatory(struct wiphy *wiphy, * as some drivers used this to restore its orig_* reg domain. */ if (initiator == NL80211_REGDOM_SET_BY_CORE && - wiphy->regulatory_flags & REGULATORY_CUSTOM_REG) + wiphy->regulatory_flags & REGULATORY_CUSTOM_REG && + !(wiphy->regulatory_flags & + REGULATORY_WIPHY_SELF_MANAGED)) reg_call_notifier(wiphy, lr); return; } @@ -2787,26 +2789,6 @@ static void notify_self_managed_wiphys(struct regulatory_request *request) } } -static bool reg_only_self_managed_wiphys(void) -{ - struct cfg80211_registered_device *rdev; - struct wiphy *wiphy; - bool self_managed_found = false; - - ASSERT_RTNL(); - - list_for_each_entry(rdev, &cfg80211_rdev_list, list) { - wiphy = &rdev->wiphy; - if (wiphy->regulatory_flags & REGULATORY_WIPHY_SELF_MANAGED) - self_managed_found = true; - else - return false; - } - - /* make sure at least one self-managed wiphy exists */ - return self_managed_found; -} - /* * Processes regulatory hints, this is all the NL80211_REGDOM_SET_BY_* * Regulatory hints come on a first come first serve basis and we @@ -2839,10 +2821,6 @@ static void reg_process_pending_hints(void) spin_unlock(®_requests_lock); notify_self_managed_wiphys(reg_request); - if (reg_only_self_managed_wiphys()) { - reg_free_request(reg_request); - return; - } reg_process_hint(reg_request); diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 2b417a2fe63f..7c73510b161f 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -2627,23 +2627,25 @@ TRACE_EVENT(cfg80211_mgmt_tx_status, ); TRACE_EVENT(cfg80211_rx_control_port, - TP_PROTO(struct net_device *netdev, const u8 *buf, size_t len, - const u8 *addr, u16 proto, bool unencrypted), - TP_ARGS(netdev, buf, len, addr, proto, unencrypted), + TP_PROTO(struct net_device *netdev, struct sk_buff *skb, + bool unencrypted), + TP_ARGS(netdev, skb, unencrypted), TP_STRUCT__entry( NETDEV_ENTRY - MAC_ENTRY(addr) + __field(int, len) + MAC_ENTRY(from) __field(u16, proto) __field(bool, unencrypted) ), TP_fast_assign( NETDEV_ASSIGN; - MAC_ASSIGN(addr, addr); - __entry->proto = proto; + __entry->len = skb->len; + MAC_ASSIGN(from, eth_hdr(skb)->h_source); + __entry->proto = be16_to_cpu(skb->protocol); __entry->unencrypted = unencrypted; ), - TP_printk(NETDEV_PR_FMT ", " MAC_PR_FMT " proto: 0x%x, unencrypted: %s", - NETDEV_PR_ARG, MAC_PR_ARG(addr), + TP_printk(NETDEV_PR_FMT ", len=%d, " MAC_PR_FMT ", proto: 0x%x, unencrypted: %s", + NETDEV_PR_ARG, __entry->len, MAC_PR_ARG(from), __entry->proto, BOOL_TO_STR(__entry->unencrypted)) ); diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c index b432daea4520..b3a0709ea7ed 100644 --- a/tools/bpf/bpftool/common.c +++ b/tools/bpf/bpftool/common.c @@ -215,6 +215,14 @@ int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(__u32)) int err; int fd; + if (argc < 3) { + p_err("too few arguments, id ID and FILE path is required"); + return -1; + } else if (argc > 3) { + p_err("too many arguments"); + return -1; + } + if (!is_prefix(*argv, "id")) { p_err("expected 'id' got %s", *argv); return -1; @@ -228,9 +236,6 @@ int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(__u32)) } NEXT_ARG(); - if (argc != 1) - usage(); - fd = get_fd_by_id(id); if (fd < 0) { p_err("can't get prog by id (%u): %s", id, strerror(errno)); diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index 0d1acb704f64..7ec85d567598 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -519,10 +519,12 @@ struct section *elf_create_section(struct elf *elf, const char *name, sec->sh.sh_flags = SHF_ALLOC; - /* Add section name to .shstrtab */ + /* Add section name to .shstrtab (or .strtab for Clang) */ shstrtab = find_section_by_name(elf, ".shstrtab"); + if (!shstrtab) + shstrtab = find_section_by_name(elf, ".strtab"); if (!shstrtab) { - WARN("can't find .shstrtab section"); + WARN("can't find .shstrtab or .strtab section"); return NULL; } diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 478bf1bcbbf5..5169a97eb68b 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -106,7 +106,7 @@ $(OUTPUT)/test_xdp_noinline.o: CLANG_FLAGS += -fno-inline BTF_LLC_PROBE := $(shell $(LLC) -march=bpf -mattr=help 2>&1 | grep dwarfris) BTF_PAHOLE_PROBE := $(shell $(BTF_PAHOLE) --help 2>&1 | grep BTF) -BTF_OBJCOPY_PROBE := $(shell $(LLVM_OBJCOPY) --version 2>&1 | grep LLVM) +BTF_OBJCOPY_PROBE := $(shell $(LLVM_OBJCOPY) --help 2>&1 | grep -i 'usage.*llvm') ifneq ($(BTF_LLC_PROBE),) ifneq ($(BTF_PAHOLE_PROBE),) diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index f5f7bcc96046..41106d9d5cc7 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -12005,6 +12005,46 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_XDP, }, { + "xadd/w check whether src/dst got mangled, 1", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), + BPF_STX_XADD(BPF_DW, BPF_REG_10, BPF_REG_0, -8), + BPF_STX_XADD(BPF_DW, BPF_REG_10, BPF_REG_0, -8), + BPF_JMP_REG(BPF_JNE, BPF_REG_6, BPF_REG_0, 3), + BPF_JMP_REG(BPF_JNE, BPF_REG_7, BPF_REG_10, 2), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .retval = 3, + }, + { + "xadd/w check whether src/dst got mangled, 2", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -8), + BPF_STX_XADD(BPF_W, BPF_REG_10, BPF_REG_0, -8), + BPF_STX_XADD(BPF_W, BPF_REG_10, BPF_REG_0, -8), + BPF_JMP_REG(BPF_JNE, BPF_REG_6, BPF_REG_0, 3), + BPF_JMP_REG(BPF_JNE, BPF_REG_7, BPF_REG_10, 2), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_10, -8), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .retval = 3, + }, + { "bpf_get_stack return R0 within range", .insns = { BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), |