diff options
Diffstat (limited to 'include')
226 files changed, 4309 insertions, 1744 deletions
diff --git a/include/asm-generic/cputime_jiffies.h b/include/asm-generic/cputime_jiffies.h index fe386fc6e85e..6bb8cd45f53b 100644 --- a/include/asm-generic/cputime_jiffies.h +++ b/include/asm-generic/cputime_jiffies.h @@ -7,7 +7,6 @@ typedef unsigned long __nocast cputime_t; #define cputime_one_jiffy jiffies_to_cputime(1) #define cputime_to_jiffies(__ct) (__force unsigned long)(__ct) -#define cputime_to_scaled(__ct) (__ct) #define jiffies_to_cputime(__hz) (__force cputime_t)(__hz) typedef u64 __nocast cputime64_t; diff --git a/include/asm-generic/cputime_nsecs.h b/include/asm-generic/cputime_nsecs.h index a84e28e0c634..4e3b18e559b1 100644 --- a/include/asm-generic/cputime_nsecs.h +++ b/include/asm-generic/cputime_nsecs.h @@ -34,7 +34,6 @@ typedef u64 __nocast cputime64_t; */ #define cputime_to_jiffies(__ct) \ cputime_div(__ct, NSEC_PER_SEC / HZ) -#define cputime_to_scaled(__ct) (__ct) #define jiffies_to_cputime(__jif) \ (__force cputime_t)((__jif) * (NSEC_PER_SEC / HZ)) #define cputime64_to_jiffies64(__ct) \ diff --git a/include/asm-generic/mutex-dec.h b/include/asm-generic/mutex-dec.h deleted file mode 100644 index c54829d3de37..000000000000 --- a/include/asm-generic/mutex-dec.h +++ /dev/null @@ -1,88 +0,0 @@ -/* - * include/asm-generic/mutex-dec.h - * - * Generic implementation of the mutex fastpath, based on atomic - * decrement/increment. - */ -#ifndef _ASM_GENERIC_MUTEX_DEC_H -#define _ASM_GENERIC_MUTEX_DEC_H - -/** - * __mutex_fastpath_lock - try to take the lock by moving the count - * from 1 to a 0 value - * @count: pointer of type atomic_t - * @fail_fn: function to call if the original value was not 1 - * - * Change the count from 1 to a value lower than 1, and call <fail_fn> if - * it wasn't 1 originally. This function MUST leave the value lower than - * 1 even when the "1" assertion wasn't true. - */ -static inline void -__mutex_fastpath_lock(atomic_t *count, void (*fail_fn)(atomic_t *)) -{ - if (unlikely(atomic_dec_return_acquire(count) < 0)) - fail_fn(count); -} - -/** - * __mutex_fastpath_lock_retval - try to take the lock by moving the count - * from 1 to a 0 value - * @count: pointer of type atomic_t - * - * Change the count from 1 to a value lower than 1. This function returns 0 - * if the fastpath succeeds, or -1 otherwise. - */ -static inline int -__mutex_fastpath_lock_retval(atomic_t *count) -{ - if (unlikely(atomic_dec_return_acquire(count) < 0)) - return -1; - return 0; -} - -/** - * __mutex_fastpath_unlock - try to promote the count from 0 to 1 - * @count: pointer of type atomic_t - * @fail_fn: function to call if the original value was not 0 - * - * Try to promote the count from 0 to 1. If it wasn't 0, call <fail_fn>. - * In the failure case, this function is allowed to either set the value to - * 1, or to set it to a value lower than 1. - * - * If the implementation sets it to a value of lower than 1, then the - * __mutex_slowpath_needs_to_unlock() macro needs to return 1, it needs - * to return 0 otherwise. - */ -static inline void -__mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *)) -{ - if (unlikely(atomic_inc_return_release(count) <= 0)) - fail_fn(count); -} - -#define __mutex_slowpath_needs_to_unlock() 1 - -/** - * __mutex_fastpath_trylock - try to acquire the mutex, without waiting - * - * @count: pointer of type atomic_t - * @fail_fn: fallback function - * - * Change the count from 1 to a value lower than 1, and return 0 (failure) - * if it wasn't 1 originally, or return 1 (success) otherwise. This function - * MUST leave the value lower than 1 even when the "1" assertion wasn't true. - * Additionally, if the value was < 0 originally, this function must not leave - * it to 0 on failure. - * - * If the architecture has no effective trylock variant, it should call the - * <fail_fn> spinlock-based trylock variant unconditionally. - */ -static inline int -__mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *)) -{ - if (likely(atomic_read(count) == 1 && atomic_cmpxchg_acquire(count, 1, 0) == 1)) - return 1; - return 0; -} - -#endif diff --git a/include/asm-generic/mutex-null.h b/include/asm-generic/mutex-null.h deleted file mode 100644 index 61069ed334e2..000000000000 --- a/include/asm-generic/mutex-null.h +++ /dev/null @@ -1,19 +0,0 @@ -/* - * include/asm-generic/mutex-null.h - * - * Generic implementation of the mutex fastpath, based on NOP :-) - * - * This is used by the mutex-debugging infrastructure, but it can also - * be used by architectures that (for whatever reason) want to use the - * spinlock based slowpath. - */ -#ifndef _ASM_GENERIC_MUTEX_NULL_H -#define _ASM_GENERIC_MUTEX_NULL_H - -#define __mutex_fastpath_lock(count, fail_fn) fail_fn(count) -#define __mutex_fastpath_lock_retval(count) (-1) -#define __mutex_fastpath_unlock(count, fail_fn) fail_fn(count) -#define __mutex_fastpath_trylock(count, fail_fn) fail_fn(count) -#define __mutex_slowpath_needs_to_unlock() 1 - -#endif diff --git a/include/asm-generic/mutex-xchg.h b/include/asm-generic/mutex-xchg.h deleted file mode 100644 index 3269ec4e195f..000000000000 --- a/include/asm-generic/mutex-xchg.h +++ /dev/null @@ -1,120 +0,0 @@ -/* - * include/asm-generic/mutex-xchg.h - * - * Generic implementation of the mutex fastpath, based on xchg(). - * - * NOTE: An xchg based implementation might be less optimal than an atomic - * decrement/increment based implementation. If your architecture - * has a reasonable atomic dec/inc then you should probably use - * asm-generic/mutex-dec.h instead, or you could open-code an - * optimized version in asm/mutex.h. - */ -#ifndef _ASM_GENERIC_MUTEX_XCHG_H -#define _ASM_GENERIC_MUTEX_XCHG_H - -/** - * __mutex_fastpath_lock - try to take the lock by moving the count - * from 1 to a 0 value - * @count: pointer of type atomic_t - * @fail_fn: function to call if the original value was not 1 - * - * Change the count from 1 to a value lower than 1, and call <fail_fn> if it - * wasn't 1 originally. This function MUST leave the value lower than 1 - * even when the "1" assertion wasn't true. - */ -static inline void -__mutex_fastpath_lock(atomic_t *count, void (*fail_fn)(atomic_t *)) -{ - if (unlikely(atomic_xchg(count, 0) != 1)) - /* - * We failed to acquire the lock, so mark it contended - * to ensure that any waiting tasks are woken up by the - * unlock slow path. - */ - if (likely(atomic_xchg_acquire(count, -1) != 1)) - fail_fn(count); -} - -/** - * __mutex_fastpath_lock_retval - try to take the lock by moving the count - * from 1 to a 0 value - * @count: pointer of type atomic_t - * - * Change the count from 1 to a value lower than 1. This function returns 0 - * if the fastpath succeeds, or -1 otherwise. - */ -static inline int -__mutex_fastpath_lock_retval(atomic_t *count) -{ - if (unlikely(atomic_xchg_acquire(count, 0) != 1)) - if (likely(atomic_xchg(count, -1) != 1)) - return -1; - return 0; -} - -/** - * __mutex_fastpath_unlock - try to promote the mutex from 0 to 1 - * @count: pointer of type atomic_t - * @fail_fn: function to call if the original value was not 0 - * - * try to promote the mutex from 0 to 1. if it wasn't 0, call <function> - * In the failure case, this function is allowed to either set the value to - * 1, or to set it to a value lower than one. - * If the implementation sets it to a value of lower than one, the - * __mutex_slowpath_needs_to_unlock() macro needs to return 1, it needs - * to return 0 otherwise. - */ -static inline void -__mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *)) -{ - if (unlikely(atomic_xchg_release(count, 1) != 0)) - fail_fn(count); -} - -#define __mutex_slowpath_needs_to_unlock() 0 - -/** - * __mutex_fastpath_trylock - try to acquire the mutex, without waiting - * - * @count: pointer of type atomic_t - * @fail_fn: spinlock based trylock implementation - * - * Change the count from 1 to a value lower than 1, and return 0 (failure) - * if it wasn't 1 originally, or return 1 (success) otherwise. This function - * MUST leave the value lower than 1 even when the "1" assertion wasn't true. - * Additionally, if the value was < 0 originally, this function must not leave - * it to 0 on failure. - * - * If the architecture has no effective trylock variant, it should call the - * <fail_fn> spinlock-based trylock variant unconditionally. - */ -static inline int -__mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *)) -{ - int prev; - - if (atomic_read(count) != 1) - return 0; - - prev = atomic_xchg_acquire(count, 0); - if (unlikely(prev < 0)) { - /* - * The lock was marked contended so we must restore that - * state. If while doing so we get back a prev value of 1 - * then we just own it. - * - * [ In the rare case of the mutex going to 1, to 0, to -1 - * and then back to 0 in this few-instructions window, - * this has the potential to trigger the slowpath for the - * owner's unlock path needlessly, but that's not a problem - * in practice. ] - */ - prev = atomic_xchg_acquire(count, prev); - if (prev < 0) - prev = 0; - } - - return prev; -} - -#endif diff --git a/include/asm-generic/mutex.h b/include/asm-generic/mutex.h deleted file mode 100644 index fe91ab502793..000000000000 --- a/include/asm-generic/mutex.h +++ /dev/null @@ -1,9 +0,0 @@ -#ifndef __ASM_GENERIC_MUTEX_H -#define __ASM_GENERIC_MUTEX_H -/* - * Pull in the generic implementation for the mutex fastpath, - * which is a reasonable default on many architectures. - */ - -#include <asm-generic/mutex-dec.h> -#endif /* __ASM_GENERIC_MUTEX_H */ diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index c4f8fd2fd384..18af2bcefe6a 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -558,10 +558,9 @@ static inline int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot, * track_pfn_insert is called when a _new_ single pfn is established * by vm_insert_pfn(). */ -static inline int track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot, - pfn_t pfn) +static inline void track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot, + pfn_t pfn) { - return 0; } /* @@ -593,8 +592,8 @@ static inline void untrack_pfn_moved(struct vm_area_struct *vma) extern int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot, unsigned long pfn, unsigned long addr, unsigned long size); -extern int track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot, - pfn_t pfn); +extern void track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot, + pfn_t pfn); extern int track_pfn_copy(struct vm_area_struct *vma); extern void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn, unsigned long size); @@ -653,18 +652,9 @@ static inline pmd_t pmd_read_atomic(pmd_t *pmdp) } #endif -#ifndef pmd_move_must_withdraw -static inline int pmd_move_must_withdraw(spinlock_t *new_pmd_ptl, - spinlock_t *old_pmd_ptl) -{ - /* - * With split pmd lock we also need to move preallocated - * PTE page table if new_pmd is on different PMD page table. - */ - return new_pmd_ptl != old_pmd_ptl; -} +#ifndef arch_needs_pgtable_deposit +#define arch_needs_pgtable_deposit() (false) #endif - /* * This function is meant to be used by sites walking pagetables with * the mmap_sem hold in read mode to protect against MADV_DONTNEED and diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index c6d667187608..7eed8cf3130a 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -107,11 +107,6 @@ struct mmu_gather { struct mmu_gather_batch local; struct page *__pages[MMU_GATHER_BUNDLE]; unsigned int batch_count; - /* - * __tlb_adjust_range will track the new addr here, - * that that we can adjust the range after the flush - */ - unsigned long addr; int page_size; }; @@ -125,16 +120,11 @@ extern bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_size); static inline void __tlb_adjust_range(struct mmu_gather *tlb, - unsigned long address) + unsigned long address, + unsigned int range_size) { tlb->start = min(tlb->start, address); - tlb->end = max(tlb->end, address + PAGE_SIZE); - /* - * Track the last address with which we adjusted the range. This - * will be used later to adjust again after a mmu_flush due to - * failed __tlb_remove_page - */ - tlb->addr = address; + tlb->end = max(tlb->end, address + range_size); } static inline void __tlb_reset_range(struct mmu_gather *tlb) @@ -150,15 +140,11 @@ static inline void __tlb_reset_range(struct mmu_gather *tlb) static inline void tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_size) { - if (__tlb_remove_page_size(tlb, page, page_size)) { + if (__tlb_remove_page_size(tlb, page, page_size)) tlb_flush_mmu(tlb); - tlb->page_size = page_size; - __tlb_adjust_range(tlb, tlb->addr); - __tlb_remove_page_size(tlb, page, page_size); - } } -static bool __tlb_remove_page(struct mmu_gather *tlb, struct page *page) +static inline bool __tlb_remove_page(struct mmu_gather *tlb, struct page *page) { return __tlb_remove_page_size(tlb, page, PAGE_SIZE); } @@ -172,14 +158,21 @@ static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page) return tlb_remove_page_size(tlb, page, PAGE_SIZE); } -static inline bool __tlb_remove_pte_page(struct mmu_gather *tlb, struct page *page) +#ifndef tlb_remove_check_page_size_change +#define tlb_remove_check_page_size_change tlb_remove_check_page_size_change +static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb, + unsigned int page_size) { - /* active->nr should be zero when we call this */ - VM_BUG_ON_PAGE(tlb->active->nr, page); - tlb->page_size = PAGE_SIZE; - __tlb_adjust_range(tlb, tlb->addr); - return __tlb_remove_page(tlb, page); + /* + * We don't care about page size change, just update + * mmu_gather page size here so that debug checks + * doesn't throw false warning. + */ +#ifdef CONFIG_DEBUG_VM + tlb->page_size = page_size; +#endif } +#endif /* * In the case of tlb vma handling, we can optimise these away in the @@ -215,10 +208,16 @@ static inline bool __tlb_remove_pte_page(struct mmu_gather *tlb, struct page *pa */ #define tlb_remove_tlb_entry(tlb, ptep, address) \ do { \ - __tlb_adjust_range(tlb, address); \ + __tlb_adjust_range(tlb, address, PAGE_SIZE); \ __tlb_remove_tlb_entry(tlb, ptep, address); \ } while (0) +#define tlb_remove_huge_tlb_entry(h, tlb, ptep, address) \ + do { \ + __tlb_adjust_range(tlb, address, huge_page_size(h)); \ + __tlb_remove_tlb_entry(tlb, ptep, address); \ + } while (0) + /** * tlb_remove_pmd_tlb_entry - remember a pmd mapping for later tlb invalidation * This is a nop so far, because only x86 needs it. @@ -227,29 +226,47 @@ static inline bool __tlb_remove_pte_page(struct mmu_gather *tlb, struct page *pa #define __tlb_remove_pmd_tlb_entry(tlb, pmdp, address) do {} while (0) #endif -#define tlb_remove_pmd_tlb_entry(tlb, pmdp, address) \ - do { \ - __tlb_adjust_range(tlb, address); \ - __tlb_remove_pmd_tlb_entry(tlb, pmdp, address); \ +#define tlb_remove_pmd_tlb_entry(tlb, pmdp, address) \ + do { \ + __tlb_adjust_range(tlb, address, HPAGE_PMD_SIZE); \ + __tlb_remove_pmd_tlb_entry(tlb, pmdp, address); \ } while (0) +/* + * For things like page tables caches (ie caching addresses "inside" the + * page tables, like x86 does), for legacy reasons, flushing an + * individual page had better flush the page table caches behind it. This + * is definitely how x86 works, for example. And if you have an + * architected non-legacy page table cache (which I'm not aware of + * anybody actually doing), you're going to have some architecturally + * explicit flushing for that, likely *separate* from a regular TLB entry + * flush, and thus you'd need more than just some range expansion.. + * + * So if we ever find an architecture + * that would want something that odd, I think it is up to that + * architecture to do its own odd thing, not cause pain for others + * http://lkml.kernel.org/r/CA+55aFzBggoXtNXQeng5d_mRoDnaMBE5Y+URs+PHR67nUpMtaw@mail.gmail.com + * + * For now w.r.t page table cache, mark the range_size as PAGE_SIZE + */ + #define pte_free_tlb(tlb, ptep, address) \ do { \ - __tlb_adjust_range(tlb, address); \ + __tlb_adjust_range(tlb, address, PAGE_SIZE); \ __pte_free_tlb(tlb, ptep, address); \ } while (0) #ifndef __ARCH_HAS_4LEVEL_HACK #define pud_free_tlb(tlb, pudp, address) \ do { \ - __tlb_adjust_range(tlb, address); \ + __tlb_adjust_range(tlb, address, PAGE_SIZE); \ __pud_free_tlb(tlb, pudp, address); \ } while (0) #endif #define pmd_free_tlb(tlb, pmdp, address) \ do { \ - __tlb_adjust_range(tlb, address); \ + __tlb_adjust_range(tlb, address, PAGE_SIZE); \ __pmd_free_tlb(tlb, pmdp, address); \ } while (0) diff --git a/include/crypto/drbg.h b/include/crypto/drbg.h index 61580b19f9f6..22f884c97387 100644 --- a/include/crypto/drbg.h +++ b/include/crypto/drbg.h @@ -124,6 +124,8 @@ struct drbg_state { struct skcipher_request *ctr_req; /* CTR mode request handle */ __u8 *ctr_null_value_buf; /* CTR mode unaligned buffer */ __u8 *ctr_null_value; /* CTR mode aligned zero buf */ + __u8 *outscratchpadbuf; /* CTR mode output scratchpad */ + __u8 *outscratchpad; /* CTR mode aligned outbuf */ struct completion ctr_completion; /* CTR mode async handler */ int ctr_async_err; /* CTR mode async error */ diff --git a/include/dt-bindings/net/mdio.h b/include/dt-bindings/net/mdio.h new file mode 100644 index 000000000000..99c6d903d439 --- /dev/null +++ b/include/dt-bindings/net/mdio.h @@ -0,0 +1,19 @@ +/* + * This header provides generic constants for ethernet MDIO bindings + */ + +#ifndef _DT_BINDINGS_NET_MDIO_H +#define _DT_BINDINGS_NET_MDIO_H + +/* + * EEE capability Advertisement + */ + +#define MDIO_EEE_100TX 0x0002 /* 100TX EEE cap */ +#define MDIO_EEE_1000T 0x0004 /* 1000T EEE cap */ +#define MDIO_EEE_10GT 0x0008 /* 10GT EEE cap */ +#define MDIO_EEE_1000KX 0x0010 /* 1000KX EEE cap */ +#define MDIO_EEE_10GKX4 0x0020 /* 10G KX4 EEE cap */ +#define MDIO_EEE_10GKR 0x0040 /* 10G KR EEE cap */ + +#endif diff --git a/include/dt-bindings/net/mscc-phy-vsc8531.h b/include/dt-bindings/net/mscc-phy-vsc8531.h deleted file mode 100644 index 2383dd20ff43..000000000000 --- a/include/dt-bindings/net/mscc-phy-vsc8531.h +++ /dev/null @@ -1,21 +0,0 @@ -/* - * Device Tree constants for Microsemi VSC8531 PHY - * - * Author: Nagaraju Lakkaraju - * - * License: Dual MIT/GPL - * Copyright (c) 2016 Microsemi Corporation - */ - -#ifndef _DT_BINDINGS_MSCC_VSC8531_H -#define _DT_BINDINGS_MSCC_VSC8531_H - -/* MAC interface Edge rate control VDDMAC in milli Volts */ -#define MSCC_VDDMAC_3300 3300 -#define MSCC_VDDMAC_2500 2500 -#define MSCC_VDDMAC_1800 1800 -#define MSCC_VDDMAC_1500 1500 -#define MSCC_VDDMAC_MAX 4 -#define MSCC_SLOWDOWN_MAX 8 - -#endif diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 61a3d90f32b3..051023756520 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -469,6 +469,7 @@ acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context); #define OSC_SB_CPCV2_SUPPORT 0x00000040 #define OSC_SB_PCLPI_SUPPORT 0x00000080 #define OSC_SB_OSLPI_SUPPORT 0x00000100 +#define OSC_SB_CPC_DIVERSE_HIGH_SUPPORT 0x00001000 extern bool osc_sb_apei_support_acked; extern bool osc_pc_lpi_support_confirmed; diff --git a/include/linux/alarmtimer.h b/include/linux/alarmtimer.h index 9d8031257a90..c70aac13244a 100644 --- a/include/linux/alarmtimer.h +++ b/include/linux/alarmtimer.h @@ -10,7 +10,12 @@ enum alarmtimer_type { ALARM_REALTIME, ALARM_BOOTTIME, + /* Supported types end here */ ALARM_NUMTYPE, + + /* Used for tracing information. No usable types. */ + ALARM_REALTIME_FREEZER, + ALARM_BOOTTIME_FREEZER, }; enum alarmtimer_restart { diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h index c357f27d5483..0b5b1af35e5e 100644 --- a/include/linux/backing-dev-defs.h +++ b/include/linux/backing-dev-defs.h @@ -136,12 +136,13 @@ struct bdi_writeback { struct backing_dev_info { struct list_head bdi_list; unsigned long ra_pages; /* max readahead in PAGE_SIZE units */ - unsigned int capabilities; /* Device capabilities */ + unsigned long io_pages; /* max allowed IO size */ congested_fn *congested_fn; /* Function pointer if device is md/dm */ void *congested_data; /* Pointer to aux data for congested func */ char *name; + unsigned int capabilities; /* Device capabilities */ unsigned int min_ratio; unsigned int max_ratio, max_prop_frac; diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h new file mode 100644 index 000000000000..7b6e5d168c95 --- /dev/null +++ b/include/linux/bpf-cgroup.h @@ -0,0 +1,92 @@ +#ifndef _BPF_CGROUP_H +#define _BPF_CGROUP_H + +#include <linux/jump_label.h> +#include <uapi/linux/bpf.h> + +struct sock; +struct cgroup; +struct sk_buff; + +#ifdef CONFIG_CGROUP_BPF + +extern struct static_key_false cgroup_bpf_enabled_key; +#define cgroup_bpf_enabled static_branch_unlikely(&cgroup_bpf_enabled_key) + +struct cgroup_bpf { + /* + * Store two sets of bpf_prog pointers, one for programs that are + * pinned directly to this cgroup, and one for those that are effective + * when this cgroup is accessed. + */ + struct bpf_prog *prog[MAX_BPF_ATTACH_TYPE]; + struct bpf_prog *effective[MAX_BPF_ATTACH_TYPE]; +}; + +void cgroup_bpf_put(struct cgroup *cgrp); +void cgroup_bpf_inherit(struct cgroup *cgrp, struct cgroup *parent); + +void __cgroup_bpf_update(struct cgroup *cgrp, + struct cgroup *parent, + struct bpf_prog *prog, + enum bpf_attach_type type); + +/* Wrapper for __cgroup_bpf_update() protected by cgroup_mutex */ +void cgroup_bpf_update(struct cgroup *cgrp, + struct bpf_prog *prog, + enum bpf_attach_type type); + +int __cgroup_bpf_run_filter_skb(struct sock *sk, + struct sk_buff *skb, + enum bpf_attach_type type); + +int __cgroup_bpf_run_filter_sk(struct sock *sk, + enum bpf_attach_type type); + +/* Wrappers for __cgroup_bpf_run_filter_skb() guarded by cgroup_bpf_enabled. */ +#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb) \ +({ \ + int __ret = 0; \ + if (cgroup_bpf_enabled) \ + __ret = __cgroup_bpf_run_filter_skb(sk, skb, \ + BPF_CGROUP_INET_INGRESS); \ + \ + __ret; \ +}) + +#define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb) \ +({ \ + int __ret = 0; \ + if (cgroup_bpf_enabled && sk && sk == skb->sk) { \ + typeof(sk) __sk = sk_to_full_sk(sk); \ + if (sk_fullsock(__sk)) \ + __ret = __cgroup_bpf_run_filter_skb(__sk, skb, \ + BPF_CGROUP_INET_EGRESS); \ + } \ + __ret; \ +}) + +#define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) \ +({ \ + int __ret = 0; \ + if (cgroup_bpf_enabled && sk) { \ + __ret = __cgroup_bpf_run_filter_sk(sk, \ + BPF_CGROUP_INET_SOCK_CREATE); \ + } \ + __ret; \ +}) + +#else + +struct cgroup_bpf {}; +static inline void cgroup_bpf_put(struct cgroup *cgrp) {} +static inline void cgroup_bpf_inherit(struct cgroup *cgrp, + struct cgroup *parent) {} + +#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; }) + +#endif /* CONFIG_CGROUP_BPF */ + +#endif /* _BPF_CGROUP_H */ diff --git a/include/linux/bpf.h b/include/linux/bpf.h index c201017b5730..8796ff03f472 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -216,6 +216,7 @@ u64 bpf_tail_call(u64 ctx, u64 r2, u64 index, u64 r4, u64 r5); u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp); +void bpf_prog_calc_digest(struct bpf_prog *fp); const struct bpf_func_proto *bpf_get_trace_printk_proto(void); @@ -233,13 +234,14 @@ void bpf_register_map_type(struct bpf_map_type_list *tl); struct bpf_prog *bpf_prog_get(u32 ufd); struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type); -struct bpf_prog *bpf_prog_add(struct bpf_prog *prog, int i); -struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog); +struct bpf_prog * __must_check bpf_prog_add(struct bpf_prog *prog, int i); +void bpf_prog_sub(struct bpf_prog *prog, int i); +struct bpf_prog * __must_check bpf_prog_inc(struct bpf_prog *prog); void bpf_prog_put(struct bpf_prog *prog); struct bpf_map *bpf_map_get_with_uref(u32 ufd); struct bpf_map *__bpf_map_get(struct fd f); -struct bpf_map *bpf_map_inc(struct bpf_map *map, bool uref); +struct bpf_map * __must_check bpf_map_inc(struct bpf_map *map, bool uref); void bpf_map_put_with_uref(struct bpf_map *map); void bpf_map_put(struct bpf_map *map); int bpf_map_precharge_memlock(u32 pages); @@ -298,15 +300,21 @@ static inline struct bpf_prog *bpf_prog_get_type(u32 ufd, { return ERR_PTR(-EOPNOTSUPP); } -static inline struct bpf_prog *bpf_prog_add(struct bpf_prog *prog, int i) +static inline struct bpf_prog * __must_check bpf_prog_add(struct bpf_prog *prog, + int i) { return ERR_PTR(-EOPNOTSUPP); } +static inline void bpf_prog_sub(struct bpf_prog *prog, int i) +{ +} + static inline void bpf_prog_put(struct bpf_prog *prog) { } -static inline struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog) + +static inline struct bpf_prog * __must_check bpf_prog_inc(struct bpf_prog *prog) { return ERR_PTR(-EOPNOTSUPP); } @@ -319,6 +327,7 @@ extern const struct bpf_func_proto bpf_map_delete_elem_proto; extern const struct bpf_func_proto bpf_get_prandom_u32_proto; extern const struct bpf_func_proto bpf_get_smp_processor_id_proto; +extern const struct bpf_func_proto bpf_get_numa_node_id_proto; extern const struct bpf_func_proto bpf_tail_call_proto; extern const struct bpf_func_proto bpf_ktime_get_ns_proto; extern const struct bpf_func_proto bpf_get_current_pid_tgid_proto; diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 7035b997aaa5..a13b031dc6b8 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -14,22 +14,16 @@ * are obviously wrong for any sort of memory access. */ #define BPF_REGISTER_MAX_RANGE (1024 * 1024 * 1024) -#define BPF_REGISTER_MIN_RANGE -(1024 * 1024 * 1024) +#define BPF_REGISTER_MIN_RANGE -1 struct bpf_reg_state { enum bpf_reg_type type; - /* - * Used to determine if any memory access using this register will - * result in a bad access. - */ - u64 min_value, max_value; union { /* valid when type == CONST_IMM | PTR_TO_STACK | UNKNOWN_VALUE */ s64 imm; /* valid when type == PTR_TO_PACKET* */ struct { - u32 id; u16 off; u16 range; }; @@ -39,6 +33,13 @@ struct bpf_reg_state { */ struct bpf_map *map_ptr; }; + u32 id; + /* Used to determine if any memory access using this register will + * result in a bad access. These two fields must be last. + * See states_equal() + */ + s64 min_value; + u64 max_value; }; enum bpf_stack_slot_type { diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index e3354b74286c..4f7d8be9ddbf 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -13,11 +13,13 @@ #define PHY_ID_BCM5241 0x0143bc30 #define PHY_ID_BCMAC131 0x0143bc70 #define PHY_ID_BCM5481 0x0143bca0 +#define PHY_ID_BCM54810 0x03625d00 #define PHY_ID_BCM5482 0x0143bcb0 #define PHY_ID_BCM5411 0x00206070 #define PHY_ID_BCM5421 0x002060e0 #define PHY_ID_BCM5464 0x002060b0 #define PHY_ID_BCM5461 0x002060c0 +#define PHY_ID_BCM54612E 0x03625e60 #define PHY_ID_BCM54616S 0x03625d10 #define PHY_ID_BCM57780 0x03625d90 @@ -55,6 +57,7 @@ #define PHY_BRCM_EXT_IBND_TX_ENABLE 0x00002000 #define PHY_BRCM_CLEAR_RGMII_MODE 0x00004000 #define PHY_BRCM_DIS_TXCRXC_NOENRGY 0x00008000 + /* Broadcom BCM7xxx specific workarounds */ #define PHY_BRCM_7XXX_REV(x) (((x) >> 8) & 0xff) #define PHY_BRCM_7XXX_PATCH(x) ((x) & 0xff) @@ -105,11 +108,15 @@ #define MII_BCM54XX_AUXCTL_ACTL_SMDSP_ENA 0x0800 #define MII_BCM54XX_AUXCTL_MISC_WREN 0x8000 +#define MII_BCM54XX_AUXCTL_MISC_RXD_RXC_SKEW 0x0100 #define MII_BCM54XX_AUXCTL_MISC_FORCE_AMDIX 0x0200 #define MII_BCM54XX_AUXCTL_MISC_RDSEL_MISC 0x7000 #define MII_BCM54XX_AUXCTL_SHDWSEL_MISC 0x0007 +#define MII_BCM54XX_AUXCTL_SHDWSEL_READ_SHIFT 12 +#define MII_BCM54XX_AUXCTL_SHDWSEL_MISC_RGMII_SKEW_EN (1 << 8) +#define MII_BCM54XX_AUXCTL_SHDWSEL_MISC_WIRESPEED_EN (1 << 4) -#define MII_BCM54XX_AUXCTL_SHDWSEL_AUXCTL 0x0000 +#define MII_BCM54XX_AUXCTL_SHDWSEL_MASK 0x0007 /* * Broadcom LED source encodings. These are used in BCM5461, BCM5481, @@ -124,6 +131,7 @@ #define BCM_LED_SRC_INTR 0x6 #define BCM_LED_SRC_QUALITY 0x7 #define BCM_LED_SRC_RCVLED 0x8 +#define BCM_LED_SRC_WIRESPEED 0x9 #define BCM_LED_SRC_MULTICOLOR1 0xa #define BCM_LED_SRC_OPENSHORT 0xb #define BCM_LED_SRC_OFF 0xe /* Tied high */ @@ -135,6 +143,14 @@ * Shadow values go into bits [14:10] of register 0x1c to select a shadow * register to access. */ + +/* 00100: Reserved control register 2 */ +#define BCM54XX_SHD_SCR2 0x04 +#define BCM54XX_SHD_SCR2_WSPD_RTRY_DIS 0x100 +#define BCM54XX_SHD_SCR2_WSPD_RTRY_LMT_SHIFT 2 +#define BCM54XX_SHD_SCR2_WSPD_RTRY_LMT_OFFSET 2 +#define BCM54XX_SHD_SCR2_WSPD_RTRY_LMT_MASK 0x7 + /* 00101: Spare Control Register 3 */ #define BCM54XX_SHD_SCR3 0x05 #define BCM54XX_SHD_SCR3_DEF_CLK125 0x0001 @@ -189,6 +205,12 @@ #define BCM5482_SSD_SGMII_SLAVE_EN 0x0002 /* Slave mode enable */ #define BCM5482_SSD_SGMII_SLAVE_AD 0x0001 /* Slave auto-detection */ +/* BCM54810 Registers */ +#define BCM54810_EXP_BROADREACH_LRE_MISC_CTL (MII_BCM54XX_EXP_SEL_ER + 0x90) +#define BCM54810_EXP_BROADREACH_LRE_MISC_CTL_EN (1 << 0) +#define BCM54810_SHD_CLK_CTL 0x3 +#define BCM54810_SHD_CLK_CTL_GTXCLK_EN (1 << 9) + /*****************************************************************************/ /* Fast Ethernet Transceiver definitions. */ @@ -222,6 +244,9 @@ #define LPI_FEATURE_EN_DIG1000X 0x4000 /* Core register definitions*/ +#define MII_BRCM_CORE_BASE12 0x12 +#define MII_BRCM_CORE_BASE13 0x13 +#define MII_BRCM_CORE_BASE14 0x14 #define MII_BRCM_CORE_BASE1E 0x1E #define MII_BRCM_CORE_EXPB0 0xB0 #define MII_BRCM_CORE_EXPB1 0xB1 diff --git a/include/linux/bug.h b/include/linux/bug.h index 292d6a10b0c2..baff2e8fc8a8 100644 --- a/include/linux/bug.h +++ b/include/linux/bug.h @@ -121,4 +121,21 @@ static inline enum bug_trap_type report_bug(unsigned long bug_addr, } #endif /* CONFIG_GENERIC_BUG */ + +/* + * Since detected data corruption should stop operation on the affected + * structures, this returns false if the corruption condition is found. + */ +#define CHECK_DATA_CORRUPTION(condition, fmt, ...) \ + do { \ + if (unlikely(condition)) { \ + if (IS_ENABLED(CONFIG_BUG_ON_DATA_CORRUPTION)) { \ + pr_err(fmt, ##__VA_ARGS__); \ + BUG(); \ + } else \ + WARN(1, fmt, ##__VA_ARGS__); \ + return false; \ + } \ + } while (0) + #endif /* _LINUX_BUG_H */ diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 5b17de62c962..861b4677fc5b 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -16,6 +16,7 @@ #include <linux/percpu-refcount.h> #include <linux/percpu-rwsem.h> #include <linux/workqueue.h> +#include <linux/bpf-cgroup.h> #ifdef CONFIG_CGROUPS @@ -300,6 +301,9 @@ struct cgroup { /* used to schedule release agent */ struct work_struct release_agent_work; + /* used to store eBPF programs */ + struct cgroup_bpf bpf; + /* ids of the ancestors at each level including self */ int ancestor_ids[]; }; diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 08398182f56e..65602d395a52 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -169,7 +169,10 @@ static inline u32 clocksource_hz2mult(u32 hz, u32 shift_constant) * @mult: cycle to nanosecond multiplier * @shift: cycle to nanosecond divisor (power of two) * - * Converts cycles to nanoseconds, using the given mult and shift. + * Converts clocksource cycles to nanoseconds, using the given @mult and @shift. + * The code is optimized for performance and is not intended to work + * with absolute clocksource cycles (as those will easily overflow), + * but is only intended to be used with relative (delta) clocksource cycles. * * XXX - This could use some mult_lxl_ll() asm optimization */ diff --git a/include/linux/cma.h b/include/linux/cma.h index 29f9e774ab76..6f0a91b37f68 100644 --- a/include/linux/cma.h +++ b/include/linux/cma.h @@ -1,6 +1,9 @@ #ifndef __CMA_H__ #define __CMA_H__ +#include <linux/init.h> +#include <linux/types.h> + /* * There is always at least global CMA area and a few optional * areas configured in kernel .config. diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 432f5c97e18f..0444b1336268 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -21,7 +21,7 @@ * clobbered. The issue is as follows: while the inline asm might * access any memory it wants, the compiler could have fit all of * @ptr into memory registers instead, and since @ptr never escaped - * from that, it proofed that the inline asm wasn't touching any of + * from that, it proved that the inline asm wasn't touching any of * it. This version works well with both compilers, i.e. we're telling * the compiler that the inline asm absolutely may see the contents * of @ptr. See also: https://llvm.org/bugs/show_bug.cgi?id=15495 @@ -263,7 +263,9 @@ #endif #endif /* CONFIG_ARCH_USE_BUILTIN_BSWAP && !__CHECKER__ */ -#if GCC_VERSION >= 50000 +#if GCC_VERSION >= 70000 +#define KASAN_ABI_VERSION 5 +#elif GCC_VERSION >= 50000 #define KASAN_ABI_VERSION 4 #elif GCC_VERSION >= 40902 #define KASAN_ABI_VERSION 3 diff --git a/include/linux/cpu.h b/include/linux/cpu.h index b886dc17f2f3..e571128ad99a 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -93,22 +93,16 @@ extern bool cpuhp_tasks_frozen; { .notifier_call = fn, .priority = pri }; \ __register_cpu_notifier(&fn##_nb); \ } -#else /* #if defined(CONFIG_HOTPLUG_CPU) || !defined(MODULE) */ -#define cpu_notifier(fn, pri) do { (void)(fn); } while (0) -#define __cpu_notifier(fn, pri) do { (void)(fn); } while (0) -#endif /* #else #if defined(CONFIG_HOTPLUG_CPU) || !defined(MODULE) */ -#ifdef CONFIG_HOTPLUG_CPU extern int register_cpu_notifier(struct notifier_block *nb); extern int __register_cpu_notifier(struct notifier_block *nb); extern void unregister_cpu_notifier(struct notifier_block *nb); extern void __unregister_cpu_notifier(struct notifier_block *nb); -#else -#ifndef MODULE -extern int register_cpu_notifier(struct notifier_block *nb); -extern int __register_cpu_notifier(struct notifier_block *nb); -#else +#else /* #if defined(CONFIG_HOTPLUG_CPU) || !defined(MODULE) */ +#define cpu_notifier(fn, pri) do { (void)(fn); } while (0) +#define __cpu_notifier(fn, pri) do { (void)(fn); } while (0) + static inline int register_cpu_notifier(struct notifier_block *nb) { return 0; @@ -118,7 +112,6 @@ static inline int __register_cpu_notifier(struct notifier_block *nb) { return 0; } -#endif static inline void unregister_cpu_notifier(struct notifier_block *nb) { diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index afe641c02dca..22acee76cf4c 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -16,9 +16,11 @@ enum cpuhp_state { CPUHP_PERF_SUPERH, CPUHP_X86_HPET_DEAD, CPUHP_X86_APB_DEAD, + CPUHP_X86_MCE_DEAD, CPUHP_VIRT_NET_DEAD, CPUHP_SLUB_DEAD, CPUHP_MM_WRITEBACK_DEAD, + CPUHP_MM_VMSTAT_DEAD, CPUHP_SOFTIRQ_DEAD, CPUHP_NET_MVNETA_DEAD, CPUHP_CPUIDLE_DEAD, @@ -30,6 +32,15 @@ enum cpuhp_state { CPUHP_ACPI_CPUDRV_DEAD, CPUHP_S390_PFAULT_DEAD, CPUHP_BLK_MQ_DEAD, + CPUHP_FS_BUFF_DEAD, + CPUHP_PRINTK_DEAD, + CPUHP_MM_MEMCQ_DEAD, + CPUHP_PERCPU_CNT_DEAD, + CPUHP_RADIX_DEAD, + CPUHP_PAGE_ALLOC_DEAD, + CPUHP_NET_DEV_DEAD, + CPUHP_PCI_XGENE_DEAD, + CPUHP_IOMMU_INTEL_DEAD, CPUHP_WORKQUEUE_PREP, CPUHP_POWER_NUMA_PREPARE, CPUHP_HRTIMERS_PREPARE, @@ -49,6 +60,16 @@ enum cpuhp_state { CPUHP_ARM_SHMOBILE_SCU_PREPARE, CPUHP_SH_SH3X_PREPARE, CPUHP_BLK_MQ_PREPARE, + CPUHP_NET_FLOW_PREPARE, + CPUHP_TOPOLOGY_PREPARE, + CPUHP_NET_IUCV_PREPARE, + CPUHP_ARM_BL_PREPARE, + CPUHP_TRACE_RB_PREPARE, + CPUHP_MM_ZS_PREPARE, + CPUHP_MM_ZSWP_MEM_PREPARE, + CPUHP_MM_ZSWP_POOL_PREPARE, + CPUHP_KVM_PPC_BOOK3S_PREPARE, + CPUHP_ZCOMP_PREPARE, CPUHP_TIMERS_DEAD, CPUHP_NOTF_ERR_INJ_PREPARE, CPUHP_MIPS_SOC_PREPARE, diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index 4d3f0d1aec73..bf1907d96097 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -52,7 +52,8 @@ extern struct srcu_struct debugfs_srcu; * Must only be called under the protection established by * debugfs_use_file_start(). */ -static inline const struct file_operations *debugfs_real_fops(struct file *filp) +static inline const struct file_operations * +debugfs_real_fops(const struct file *filp) __must_hold(&debugfs_srcu) { /* diff --git a/include/linux/device.h b/include/linux/device.h index bc41e87a969b..94926d3ad6c6 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -698,6 +698,25 @@ static inline int devm_add_action_or_reset(struct device *dev, return ret; } +/** + * devm_alloc_percpu - Resource-managed alloc_percpu + * @dev: Device to allocate per-cpu memory for + * @type: Type to allocate per-cpu memory for + * + * Managed alloc_percpu. Per-cpu memory allocated with this function is + * automatically freed on driver detach. + * + * RETURNS: + * Pointer to allocated memory on success, NULL on failure. + */ +#define devm_alloc_percpu(dev, type) \ + ((typeof(type) __percpu *)__devm_alloc_percpu((dev), sizeof(type), \ + __alignof__(type))) + +void __percpu *__devm_alloc_percpu(struct device *dev, size_t size, + size_t align); +void devm_free_percpu(struct device *dev, void __percpu *pdata); + struct device_dma_parameters { /* * a low level driver may set these to teach IOMMU code about @@ -733,7 +752,7 @@ struct device_dma_parameters { * minimizes board-specific #ifdefs in drivers. * @driver_data: Private pointer for driver specific info. * @power: For device power management. - * See Documentation/power/devices.txt for details. + * See Documentation/power/admin-guide/devices.rst for details. * @pm_domain: Provide callbacks that are executed during system suspend, * hibernation, system resume and during runtime PM transitions * along with subsystem-level and driver-level callbacks. diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index c934d3a96b5e..2896f93808ae 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -67,7 +67,7 @@ * genl_magic_func.h * generates an entry in the static genl_ops array, * and static register/unregister functions to - * genl_register_family_with_ops(). + * genl_register_family(). * * flags and handler: * GENL_op_init( .doit = x, .dumpit = y, .flags = something) diff --git a/include/linux/efi.h b/include/linux/efi.h index 2d089487d2da..a07a476178cd 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -443,6 +443,22 @@ typedef struct { #define EFI_PCI_IO_ATTRIBUTE_VGA_PALETTE_IO_16 0x20000 #define EFI_PCI_IO_ATTRIBUTE_VGA_IO_16 0x40000 +typedef struct { + u32 version; + u32 get; + u32 set; + u32 del; + u32 get_all; +} apple_properties_protocol_32_t; + +typedef struct { + u64 version; + u64 get; + u64 set; + u64 del; + u64 get_all; +} apple_properties_protocol_64_t; + /* * Types and defines for EFI ResetSystem */ @@ -589,8 +605,10 @@ void efi_native_runtime_setup(void); #define DEVICE_TREE_GUID EFI_GUID(0xb1b621d5, 0xf19c, 0x41a5, 0x83, 0x0b, 0xd9, 0x15, 0x2c, 0x69, 0xaa, 0xe0) #define EFI_PROPERTIES_TABLE_GUID EFI_GUID(0x880aaca3, 0x4adc, 0x4a04, 0x90, 0x79, 0xb7, 0x47, 0x34, 0x08, 0x25, 0xe5) #define EFI_RNG_PROTOCOL_GUID EFI_GUID(0x3152bca5, 0xeade, 0x433d, 0x86, 0x2e, 0xc0, 0x1c, 0xdc, 0x29, 0x1f, 0x44) +#define EFI_RNG_ALGORITHM_RAW EFI_GUID(0xe43176d7, 0xb6e8, 0x4827, 0xb7, 0x84, 0x7f, 0xfd, 0xc4, 0xb6, 0x85, 0x61) #define EFI_MEMORY_ATTRIBUTES_TABLE_GUID EFI_GUID(0xdcfa911d, 0x26eb, 0x469f, 0xa2, 0x20, 0x38, 0xb7, 0xdc, 0x46, 0x12, 0x20) #define EFI_CONSOLE_OUT_DEVICE_GUID EFI_GUID(0xd3b36f2c, 0xd551, 0x11d4, 0x9a, 0x46, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d) +#define APPLE_PROPERTIES_PROTOCOL_GUID EFI_GUID(0x91bd12fe, 0xf6c3, 0x44fb, 0xa5, 0xb7, 0x51, 0x22, 0xab, 0x30, 0x3a, 0xe0) /* * This GUID is used to pass to the kernel proper the struct screen_info @@ -599,6 +617,7 @@ void efi_native_runtime_setup(void); */ #define LINUX_EFI_ARM_SCREEN_INFO_TABLE_GUID EFI_GUID(0xe03fc20a, 0x85dc, 0x406e, 0xb9, 0x0e, 0x4a, 0xb5, 0x02, 0x37, 0x1d, 0x95) #define LINUX_EFI_LOADER_ENTRY_GUID EFI_GUID(0x4a67b082, 0x0a4c, 0x41cf, 0xb6, 0xc7, 0x44, 0x0b, 0x29, 0xbb, 0x8c, 0x4f) +#define LINUX_EFI_RANDOM_SEED_TABLE_GUID EFI_GUID(0x1ce1e5bc, 0x7ceb, 0x42f2, 0x81, 0xe5, 0x8a, 0xad, 0xf1, 0x80, 0xf5, 0x7b) typedef struct { efi_guid_t guid; @@ -872,6 +891,7 @@ extern struct efi { unsigned long esrt; /* ESRT table */ unsigned long properties_table; /* properties table */ unsigned long mem_attr_table; /* memory attributes table */ + unsigned long rng_seed; /* UEFI firmware random seed */ efi_get_time_t *get_time; efi_set_time_t *set_time; efi_get_wakeup_time_t *get_wakeup_time; @@ -1145,6 +1165,26 @@ struct efi_generic_dev_path { u16 length; } __attribute ((packed)); +struct efi_dev_path { + u8 type; /* can be replaced with unnamed */ + u8 sub_type; /* struct efi_generic_dev_path; */ + u16 length; /* once we've moved to -std=c11 */ + union { + struct { + u32 hid; + u32 uid; + } acpi; + struct { + u8 fn; + u8 dev; + } pci; + }; +} __attribute ((packed)); + +#if IS_ENABLED(CONFIG_EFI_DEV_PATH_PARSER) +struct device *efi_get_device_by_path(struct efi_dev_path **node, size_t *len); +#endif + static inline void memrange_efi_to_native(u64 *addr, u64 *npages) { *npages = PFN_UP(*addr + (*npages<<EFI_PAGE_SHIFT)) - PFN_DOWN(*addr); @@ -1493,4 +1533,10 @@ efi_status_t efi_exit_boot_services(efi_system_table_t *sys_table, struct efi_boot_memmap *map, void *priv, efi_exit_boot_map_processing priv_func); + +struct linux_efi_random_seed { + u32 size; + u8 bits[]; +}; + #endif /* _LINUX_EFI_H */ diff --git a/include/linux/fddidevice.h b/include/linux/fddidevice.h index 9a79f0106da1..32c22cfb238b 100644 --- a/include/linux/fddidevice.h +++ b/include/linux/fddidevice.h @@ -26,7 +26,6 @@ #ifdef __KERNEL__ __be16 fddi_type_trans(struct sk_buff *skb, struct net_device *dev); -int fddi_change_mtu(struct net_device *dev, int new_mtu); struct net_device *alloc_fddidev(int sizeof_priv); #endif diff --git a/include/linux/filter.h b/include/linux/filter.h index 1f09c521adfe..6a1658308612 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -14,6 +14,7 @@ #include <linux/workqueue.h> #include <linux/sched.h> #include <linux/capability.h> +#include <linux/cryptohash.h> #include <net/sch_generic.h> @@ -56,6 +57,9 @@ struct bpf_prog_aux; /* BPF program can access up to 512 bytes of stack space. */ #define MAX_BPF_STACK 512 +/* Maximum BPF program size in bytes. */ +#define MAX_BPF_SIZE (BPF_MAXINSNS * sizeof(struct bpf_insn)) + /* Helper macros for filter block array initializers. */ /* ALU ops on registers, bpf_add|sub|...: dst_reg += src_reg */ @@ -402,14 +406,16 @@ struct bpf_prog { u16 jited:1, /* Is our filter JIT'ed? */ gpl_compatible:1, /* Is filter GPL compatible? */ cb_access:1, /* Is control block accessed? */ - dst_needed:1; /* Do we need dst entry? */ + dst_needed:1, /* Do we need dst entry? */ + xdp_adjust_head:1; /* Adjusting pkt head? */ kmemcheck_bitfield_end(meta); - u32 len; /* Number of filter blocks */ enum bpf_prog_type type; /* Type of BPF program */ + u32 len; /* Number of filter blocks */ + u32 digest[SHA_DIGEST_WORDS]; /* Program digest */ struct bpf_prog_aux *aux; /* Auxiliary fields */ struct sock_fprog_kern *orig_prog; /* Original BPF program */ - unsigned int (*bpf_func)(const struct sk_buff *skb, - const struct bpf_insn *filter); + unsigned int (*bpf_func)(const void *ctx, + const struct bpf_insn *insn); /* Instructions for interpreter */ union { struct sock_filter insns[0]; @@ -435,10 +441,11 @@ struct bpf_skb_data_end { struct xdp_buff { void *data; void *data_end; + void *data_hard_start; }; /* compute the linear packet data range [data, data_end) which - * will be accessed by cls_bpf and act_bpf programs + * will be accessed by cls_bpf, act_bpf and lwt programs */ static inline void bpf_compute_data_end(struct sk_buff *skb) { @@ -498,16 +505,16 @@ static inline u32 bpf_prog_run_clear_cb(const struct bpf_prog *prog, return BPF_PROG_RUN(prog, skb); } -static inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog, - struct xdp_buff *xdp) +static __always_inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog, + struct xdp_buff *xdp) { - u32 ret; - - rcu_read_lock(); - ret = BPF_PROG_RUN(prog, (void *)xdp); - rcu_read_unlock(); - - return ret; + /* Caller needs to hold rcu_read_lock() (!), otherwise program + * can be released while still running, or map elements could be + * freed early while still having concurrent users. XDP fastpath + * already takes rcu_read_lock() when fetching the program, so + * it's not necessary here anymore. + */ + return BPF_PROG_RUN(prog, xdp); } static inline unsigned int bpf_prog_size(unsigned int proglen) @@ -590,7 +597,7 @@ void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp); u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog); -bool bpf_helper_changes_skb_data(void *func); +bool bpf_helper_changes_pkt_data(void *func); struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, const struct bpf_insn *patch, u32 len); diff --git a/include/linux/genl_magic_func.h b/include/linux/genl_magic_func.h index 667c31101b8b..377257d8f7e3 100644 --- a/include/linux/genl_magic_func.h +++ b/include/linux/genl_magic_func.h @@ -259,16 +259,7 @@ static struct genl_ops ZZZ_genl_ops[] __read_mostly = { * {{{2 */ #define ZZZ_genl_family CONCAT_(GENL_MAGIC_FAMILY, _genl_family) -static struct genl_family ZZZ_genl_family __read_mostly = { - .id = GENL_ID_GENERATE, - .name = __stringify(GENL_MAGIC_FAMILY), - .version = GENL_MAGIC_VERSION, -#ifdef GENL_MAGIC_FAMILY_HDRSZ - .hdrsize = NLA_ALIGN(GENL_MAGIC_FAMILY_HDRSZ), -#endif - .maxattr = ARRAY_SIZE(drbd_tla_nl_policy)-1, -}; - +static struct genl_family ZZZ_genl_family; /* * Magic: define multicast groups * Magic: define multicast group registration helper @@ -302,11 +293,23 @@ static int CONCAT_(GENL_MAGIC_FAMILY, _genl_multicast_ ## group)( \ #undef GENL_mc_group #define GENL_mc_group(group) +static struct genl_family ZZZ_genl_family __ro_after_init = { + .name = __stringify(GENL_MAGIC_FAMILY), + .version = GENL_MAGIC_VERSION, +#ifdef GENL_MAGIC_FAMILY_HDRSZ + .hdrsize = NLA_ALIGN(GENL_MAGIC_FAMILY_HDRSZ), +#endif + .maxattr = ARRAY_SIZE(drbd_tla_nl_policy)-1, + .ops = ZZZ_genl_ops, + .n_ops = ARRAY_SIZE(ZZZ_genl_ops), + .mcgrps = ZZZ_genl_mcgrps, + .n_mcgrps = ARRAY_SIZE(ZZZ_genl_mcgrps), + .module = THIS_MODULE, +}; + int CONCAT_(GENL_MAGIC_FAMILY, _genl_register)(void) { - return genl_register_family_with_ops_groups(&ZZZ_genl_family, \ - ZZZ_genl_ops, \ - ZZZ_genl_mcgrps); + return genl_register_family(&ZZZ_genl_family); } void CONCAT_(GENL_MAGIC_FAMILY, _genl_unregister)(void) diff --git a/include/linux/hdlc.h b/include/linux/hdlc.h index e31bcd4c7859..97585d9679f3 100644 --- a/include/linux/hdlc.h +++ b/include/linux/hdlc.h @@ -93,8 +93,6 @@ static __inline__ void debug_frame(const struct sk_buff *skb) int hdlc_open(struct net_device *dev); /* Must be called by hardware driver when HDLC device is being closed */ void hdlc_close(struct net_device *dev); -/* May be used by hardware driver */ -int hdlc_change_mtu(struct net_device *dev, int new_mtu); /* Must be pointed to by hw driver's dev->netdev_ops->ndo_start_xmit */ netdev_tx_t hdlc_start_xmit(struct sk_buff *skb, struct net_device *dev); diff --git a/include/linux/hippidevice.h b/include/linux/hippidevice.h index 8ec23fb0b412..402f99e328d4 100644 --- a/include/linux/hippidevice.h +++ b/include/linux/hippidevice.h @@ -32,7 +32,6 @@ struct hippi_cb { }; __be16 hippi_type_trans(struct sk_buff *skb, struct net_device *dev); -int hippi_change_mtu(struct net_device *dev, int new_mtu); int hippi_mac_addr(struct net_device *dev, void *p); int hippi_neigh_setup_dev(struct net_device *dev, struct neigh_parms *p); struct net_device *alloc_hippi_dev(int sizeof_priv); diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index e35e6de633b9..1f782aa1d8e6 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -189,6 +189,8 @@ static inline void deferred_split_huge_page(struct page *page) {} #define split_huge_pmd(__vma, __pmd, __address) \ do { } while (0) +static inline void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, + unsigned long address, bool freeze, struct page *page) {} static inline void split_huge_pmd_address(struct vm_area_struct *vma, unsigned long address, bool freeze, struct page *page) {} diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index a80516fd65c8..fe849329511a 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1576,6 +1576,9 @@ struct ieee80211_vht_operation { #define WLAN_AUTH_SHARED_KEY 1 #define WLAN_AUTH_FT 2 #define WLAN_AUTH_SAE 3 +#define WLAN_AUTH_FILS_SK 4 +#define WLAN_AUTH_FILS_SK_PFS 5 +#define WLAN_AUTH_FILS_PK 6 #define WLAN_AUTH_LEAP 128 #define WLAN_AUTH_CHALLENGE_LEN 128 @@ -1960,6 +1963,26 @@ enum ieee80211_eid { WLAN_EID_VENDOR_SPECIFIC = 221, WLAN_EID_QOS_PARAMETER = 222, + WLAN_EID_CAG_NUMBER = 237, + WLAN_EID_AP_CSN = 239, + WLAN_EID_FILS_INDICATION = 240, + WLAN_EID_DILS = 241, + WLAN_EID_FRAGMENT = 242, + WLAN_EID_EXTENSION = 255 +}; + +/* Element ID Extensions for Element ID 255 */ +enum ieee80211_eid_ext { + WLAN_EID_EXT_ASSOC_DELAY_INFO = 1, + WLAN_EID_EXT_FILS_REQ_PARAMS = 2, + WLAN_EID_EXT_FILS_KEY_CONFIRM = 3, + WLAN_EID_EXT_FILS_SESSION = 4, + WLAN_EID_EXT_FILS_HLP_CONTAINER = 5, + WLAN_EID_EXT_FILS_IP_ADDR_ASSIGN = 6, + WLAN_EID_EXT_KEY_DELIVERY = 7, + WLAN_EID_EXT_FILS_WRAPPED_DATA = 8, + WLAN_EID_EXT_FILS_PUBLIC_KEY = 12, + WLAN_EID_EXT_FILS_NONCE = 13, }; /* Action category code */ @@ -2073,6 +2096,9 @@ enum ieee80211_key_len { #define IEEE80211_GCMP_MIC_LEN 16 #define IEEE80211_GCMP_PN_LEN 6 +#define FILS_NONCE_LEN 16 +#define FILS_MAX_KEK_LEN 64 + /* Public action codes */ enum ieee80211_pub_actioncode { WLAN_PUB_ACTION_EXT_CHANSW_ANN = 4, diff --git a/include/linux/if_arp.h b/include/linux/if_arp.h index f563907ed776..3355efc89781 100644 --- a/include/linux/if_arp.h +++ b/include/linux/if_arp.h @@ -44,4 +44,20 @@ static inline int arp_hdr_len(struct net_device *dev) return sizeof(struct arphdr) + (dev->addr_len + sizeof(u32)) * 2; } } + +static inline bool dev_is_mac_header_xmit(const struct net_device *dev) +{ + switch (dev->type) { + case ARPHRD_TUNNEL: + case ARPHRD_TUNNEL6: + case ARPHRD_SIT: + case ARPHRD_IPGRE: + case ARPHRD_VOID: + case ARPHRD_NONE: + return false; + default: + return true; + } +} + #endif /* _LINUX_IF_ARP_H */ diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 3319d97d789d..8d5fcd6284ce 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -399,22 +399,6 @@ static inline struct sk_buff *__vlan_hwaccel_push_inside(struct sk_buff *skb) skb->vlan_tci = 0; return skb; } -/* - * vlan_hwaccel_push_inside - pushes vlan tag to the payload - * @skb: skbuff to tag - * - * Checks is tag is present in @skb->vlan_tci and if it is, it pushes the - * VLAN tag from @skb->vlan_tci inside to the payload. - * - * Following the skb_unshare() example, in case of error, the calling function - * doesn't have to worry about freeing the original skb. - */ -static inline struct sk_buff *vlan_hwaccel_push_inside(struct sk_buff *skb) -{ - if (skb_vlan_tag_present(skb)) - skb = __vlan_hwaccel_push_inside(skb); - return skb; -} /** * __vlan_hwaccel_put_tag - hardware accelerated VLAN inserting diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 2d9b650047a5..d49e26c6cdc7 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -429,6 +429,7 @@ struct intel_iommu { struct page_req_dsc *prq; unsigned char prq_name[16]; /* Name for PRQ interrupt */ struct idr pasid_idr; + u32 pasid_max; #endif struct q_inval *qi; /* Queued invalidation info */ u32 *iommu_state; /* Store iommu states between suspend and resume.*/ diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 72f0721f75e7..53144e78a369 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -232,6 +232,18 @@ struct irq_affinity_notify { void (*release)(struct kref *ref); }; +/** + * struct irq_affinity - Description for automatic irq affinity assignements + * @pre_vectors: Don't apply affinity to @pre_vectors at beginning of + * the MSI(-X) vector space + * @post_vectors: Don't apply affinity to @post_vectors at end of + * the MSI(-X) vector space + */ +struct irq_affinity { + int pre_vectors; + int post_vectors; +}; + #if defined(CONFIG_SMP) extern cpumask_var_t irq_default_affinity; @@ -278,8 +290,8 @@ extern int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m); extern int irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify); -struct cpumask *irq_create_affinity_masks(const struct cpumask *affinity, int nvec); -int irq_calc_affinity_vectors(const struct cpumask *affinity, int maxvec); +struct cpumask *irq_create_affinity_masks(int nvec, const struct irq_affinity *affd); +int irq_calc_affinity_vectors(int maxvec, const struct irq_affinity *affd); #else /* CONFIG_SMP */ @@ -313,13 +325,13 @@ irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify) } static inline struct cpumask * -irq_create_affinity_masks(const struct cpumask *affinity, int nvec) +irq_create_affinity_masks(int nvec, const struct irq_affinity *affd) { return NULL; } static inline int -irq_calc_affinity_vectors(const struct cpumask *affinity, int maxvec) +irq_calc_affinity_vectors(int maxvec, const struct irq_affinity *affd) { return maxvec; } diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index a0649973ee5b..671d014e6429 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -64,6 +64,11 @@ struct ipv6_devconf { } stable_secret; __s32 use_oif_addrs_only; __s32 keep_addr_on_down; + __s32 seg6_enabled; +#ifdef CONFIG_IPV6_SEG6_HMAC + __s32 seg6_require_hmac; +#endif + __u32 enhanced_dad; struct ctl_table_header *sysctl_header; }; @@ -229,8 +234,9 @@ struct ipv6_pinfo { rxflow:1, rxtclass:1, rxpmtu:1, - rxorigdstaddr:1; - /* 2 bits hole */ + rxorigdstaddr:1, + recvfragsize:1; + /* 1 bits hole */ } bits; __u16 all; } rxopt; diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index b7e34313cdfe..5118d3a0c9ca 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -239,7 +239,7 @@ #define GITS_TYPER_PTA (1UL << 19) #define GITS_TYPER_HWCOLLCNT_SHIFT 24 -#define GITS_CBASER_VALID (1UL << 63) +#define GITS_CBASER_VALID (1ULL << 63) #define GITS_CBASER_SHAREABILITY_SHIFT (10) #define GITS_CBASER_INNER_CACHEABILITY_SHIFT (59) #define GITS_CBASER_OUTER_CACHEABILITY_SHIFT (53) @@ -265,7 +265,7 @@ #define GITS_BASER_NR_REGS 8 -#define GITS_BASER_VALID (1UL << 63) +#define GITS_BASER_VALID (1ULL << 63) #define GITS_BASER_INDIRECT (1ULL << 62) #define GITS_BASER_INNER_CACHEABILITY_SHIFT (59) diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index 44fda64ad434..00f776816aa3 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -78,8 +78,8 @@ static inline unsigned int kstat_cpu_irqs_sum(unsigned int cpu) return kstat_cpu(cpu).irqs_sum; } -extern void account_user_time(struct task_struct *, cputime_t, cputime_t); -extern void account_system_time(struct task_struct *, int, cputime_t, cputime_t); +extern void account_user_time(struct task_struct *, cputime_t); +extern void account_system_time(struct task_struct *, int, cputime_t); extern void account_steal_time(cputime_t); extern void account_idle_time(cputime_t); diff --git a/include/linux/kthread.h b/include/linux/kthread.h index a6e82a69c363..4fec8b775895 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h @@ -48,6 +48,7 @@ struct task_struct *kthread_create_on_cpu(int (*threadfn)(void *data), __k; \ }) +void free_kthread_struct(struct task_struct *k); void kthread_bind(struct task_struct *k, unsigned int cpu); void kthread_bind_mask(struct task_struct *k, const struct cpumask *mask); int kthread_stop(struct task_struct *k); @@ -174,7 +175,7 @@ __printf(2, 3) struct kthread_worker * kthread_create_worker(unsigned int flags, const char namefmt[], ...); -struct kthread_worker * +__printf(3, 4) struct kthread_worker * kthread_create_worker_on_cpu(int cpu, unsigned int flags, const char namefmt[], ...); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 01c0b9cc3915..81ba3ba641ba 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -224,7 +224,6 @@ struct kvm_vcpu { int fpu_active; int guest_fpu_loaded, guest_xcr0_loaded; - unsigned char fpu_counter; struct swait_queue_head wq; struct pid *pid; int sigset_active; @@ -645,6 +644,8 @@ int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data, unsigned long len); int kvm_write_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, void *data, unsigned long len); +int kvm_write_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, + void *data, int offset, unsigned long len); int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc, gpa_t gpa, unsigned long len); int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len); diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index f4947fda11e7..8458c5351e56 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -143,7 +143,7 @@ u32 nd_cmd_in_size(struct nvdimm *nvdimm, int cmd, const struct nd_cmd_desc *desc, int idx, void *buf); u32 nd_cmd_out_size(struct nvdimm *nvdimm, int cmd, const struct nd_cmd_desc *desc, int idx, const u32 *in_field, - const u32 *out_field); + const u32 *out_field, unsigned long remainder); int nvdimm_bus_check_dimm_count(struct nvdimm_bus *nvdimm_bus, int dimm_count); struct nd_region *nvdimm_pmem_region_create(struct nvdimm_bus *nvdimm_bus, struct nd_region_desc *ndr_desc); diff --git a/include/linux/list.h b/include/linux/list.h index 5809e9a2de5b..d1039ecaf94f 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -28,27 +28,42 @@ static inline void INIT_LIST_HEAD(struct list_head *list) list->prev = list; } +#ifdef CONFIG_DEBUG_LIST +extern bool __list_add_valid(struct list_head *new, + struct list_head *prev, + struct list_head *next); +extern bool __list_del_entry_valid(struct list_head *entry); +#else +static inline bool __list_add_valid(struct list_head *new, + struct list_head *prev, + struct list_head *next) +{ + return true; +} +static inline bool __list_del_entry_valid(struct list_head *entry) +{ + return true; +} +#endif + /* * Insert a new entry between two known consecutive entries. * * This is only for internal list manipulation where we know * the prev/next entries already! */ -#ifndef CONFIG_DEBUG_LIST static inline void __list_add(struct list_head *new, struct list_head *prev, struct list_head *next) { + if (!__list_add_valid(new, prev, next)) + return; + next->prev = new; new->next = next; new->prev = prev; WRITE_ONCE(prev->next, new); } -#else -extern void __list_add(struct list_head *new, - struct list_head *prev, - struct list_head *next); -#endif /** * list_add - add a new entry @@ -96,22 +111,20 @@ static inline void __list_del(struct list_head * prev, struct list_head * next) * Note: list_empty() on entry does not return true after this, the entry is * in an undefined state. */ -#ifndef CONFIG_DEBUG_LIST static inline void __list_del_entry(struct list_head *entry) { + if (!__list_del_entry_valid(entry)) + return; + __list_del(entry->prev, entry->next); } static inline void list_del(struct list_head *entry) { - __list_del(entry->prev, entry->next); + __list_del_entry(entry); entry->next = LIST_POISON1; entry->prev = LIST_POISON2; } -#else -extern void __list_del_entry(struct list_head *entry); -extern void list_del(struct list_head *entry); -#endif /** * list_replace - replace old entry by new one diff --git a/include/linux/mbus.h b/include/linux/mbus.h index 2931aa43dab1..0d3f14fd2621 100644 --- a/include/linux/mbus.h +++ b/include/linux/mbus.h @@ -82,6 +82,7 @@ static inline int mvebu_mbus_get_io_win_info(phys_addr_t phyaddr, u32 *size, } #endif +#ifdef CONFIG_MVEBU_MBUS int mvebu_mbus_save_cpu_target(u32 __iomem *store_addr); void mvebu_mbus_get_pcie_mem_aperture(struct resource *res); void mvebu_mbus_get_pcie_io_aperture(struct resource *res); @@ -97,5 +98,12 @@ int mvebu_mbus_init(const char *soc, phys_addr_t mbus_phys_base, size_t mbus_size, phys_addr_t sdram_phys_base, size_t sdram_size); int mvebu_mbus_dt_init(bool is_coherent); +#else +static inline int mvebu_mbus_get_dram_win_info(phys_addr_t phyaddr, u8 *target, + u8 *attr) +{ + return -EINVAL; +} +#endif /* CONFIG_MVEBU_MBUS */ #endif /* __LINUX_MBUS_H */ diff --git a/include/linux/mc146818rtc.h b/include/linux/mc146818rtc.h index a585b4b5fa0e..0661af17a758 100644 --- a/include/linux/mc146818rtc.h +++ b/include/linux/mc146818rtc.h @@ -16,6 +16,7 @@ #include <asm/mc146818rtc.h> /* register access macros */ #include <linux/bcd.h> #include <linux/delay.h> +#include <linux/pm-trace.h> #ifdef __KERNEL__ #include <linux/spinlock.h> /* spinlock_t */ diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index 5e5b2969d931..5f4d8281832b 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -7,6 +7,7 @@ #include <linux/mmzone.h> +#include <linux/dax.h> #include <linux/slab.h> #include <linux/rbtree.h> #include <linux/spinlock.h> @@ -177,6 +178,13 @@ static inline bool vma_migratable(struct vm_area_struct *vma) if (vma->vm_flags & (VM_IO | VM_PFNMAP)) return false; + /* + * DAX device mappings require predictable access latency, so avoid + * incurring periodic faults. + */ + if (vma_is_dax(vma)) + return false; + #ifndef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION if (vma->vm_flags & VM_HUGETLB) return false; diff --git a/include/linux/mii.h b/include/linux/mii.h index 47492c9631b3..1629a0c32679 100644 --- a/include/linux/mii.h +++ b/include/linux/mii.h @@ -31,7 +31,11 @@ struct mii_if_info { extern int mii_link_ok (struct mii_if_info *mii); extern int mii_nway_restart (struct mii_if_info *mii); extern int mii_ethtool_gset(struct mii_if_info *mii, struct ethtool_cmd *ecmd); +extern int mii_ethtool_get_link_ksettings( + struct mii_if_info *mii, struct ethtool_link_ksettings *cmd); extern int mii_ethtool_sset(struct mii_if_info *mii, struct ethtool_cmd *ecmd); +extern int mii_ethtool_set_link_ksettings( + struct mii_if_info *mii, const struct ethtool_link_ksettings *cmd); extern int mii_check_gmii_support(struct mii_if_info *mii); extern void mii_check_link (struct mii_if_info *mii); extern unsigned int mii_check_media (struct mii_if_info *mii, diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 3be7abd6e722..c9f379689dd0 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -476,7 +476,6 @@ enum { enum { MLX4_INTERFACE_STATE_UP = 1 << 0, MLX4_INTERFACE_STATE_DELETION = 1 << 1, - MLX4_INTERFACE_STATE_SHUTDOWN = 1 << 2, }; #define MSTR_SM_CHANGE_MASK (MLX4_EQ_PORT_INFO_MSTR_SM_SL_CHANGE_MASK | \ diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 58276144ba81..9f489365b3d3 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -277,6 +277,7 @@ enum mlx5_event { MLX5_EVENT_TYPE_INTERNAL_ERROR = 0x08, MLX5_EVENT_TYPE_PORT_CHANGE = 0x09, MLX5_EVENT_TYPE_GPIO_EVENT = 0x15, + MLX5_EVENT_TYPE_PORT_MODULE_EVENT = 0x16, MLX5_EVENT_TYPE_REMOTE_CONFIG = 0x19, MLX5_EVENT_TYPE_DB_BF_CONGESTION = 0x1a, @@ -552,6 +553,15 @@ struct mlx5_eqe_vport_change { __be32 rsvd1[6]; } __packed; +struct mlx5_eqe_port_module { + u8 reserved_at_0[1]; + u8 module; + u8 reserved_at_2[1]; + u8 module_status; + u8 reserved_at_4[2]; + u8 error_type; +} __packed; + union ev_data { __be32 raw[7]; struct mlx5_eqe_cmd cmd; @@ -565,6 +575,7 @@ union ev_data { struct mlx5_eqe_page_req req_pages; struct mlx5_eqe_page_fault page_fault; struct mlx5_eqe_vport_change vport_change; + struct mlx5_eqe_port_module port_module; } __packed; struct mlx5_eqe { @@ -1060,6 +1071,11 @@ enum { MLX5_INFINIBAND_PORT_COUNTERS_GROUP = 0x20, }; +enum { + MLX5_PCIE_PERFORMANCE_COUNTERS_GROUP = 0x0, + MLX5_PCIE_TIMERS_AND_STATES_COUNTERS_GROUP = 0x2, +}; + static inline u16 mlx5_to_sw_pkey_sz(int pkey_sz) { if (pkey_sz > MLX5_MAX_LOG_PKEY_TABLE) diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index ecc451d89ccd..0ae55361e674 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -104,6 +104,8 @@ enum { enum { MLX5_REG_QETCR = 0x4005, MLX5_REG_QTCT = 0x400a, + MLX5_REG_DCBX_PARAM = 0x4020, + MLX5_REG_DCBX_APP = 0x4021, MLX5_REG_PCAP = 0x5001, MLX5_REG_PMTU = 0x5003, MLX5_REG_PTYS = 0x5004, @@ -121,6 +123,12 @@ enum { MLX5_REG_HOST_ENDIANNESS = 0x7004, MLX5_REG_MCIA = 0x9014, MLX5_REG_MLCR = 0x902b, + MLX5_REG_MPCNT = 0x9051, +}; + +enum mlx5_dcbx_oper_mode { + MLX5E_DCBX_PARAM_VER_OPER_HOST = 0x0, + MLX5E_DCBX_PARAM_VER_OPER_AUTO = 0x3, }; enum { @@ -208,7 +216,7 @@ struct mlx5_cmd_first { struct mlx5_cmd_msg { struct list_head list; - struct cache_ent *cache; + struct cmd_msg_cache *parent; u32 len; struct mlx5_cmd_first first; struct mlx5_cmd_mailbox *next; @@ -228,17 +236,17 @@ struct mlx5_cmd_debug { u16 outlen; }; -struct cache_ent { +struct cmd_msg_cache { /* protect block chain allocations */ spinlock_t lock; struct list_head head; + unsigned int max_inbox_size; + unsigned int num_ent; }; -struct cmd_msg_cache { - struct cache_ent large; - struct cache_ent med; - +enum { + MLX5_NUM_COMMAND_CACHES = 5, }; struct mlx5_cmd_stats { @@ -281,7 +289,7 @@ struct mlx5_cmd { struct mlx5_cmd_work_ent *ent_arr[MLX5_MAX_COMMANDS]; struct pci_pool *pool; struct mlx5_cmd_debug dbg; - struct cmd_msg_cache cache; + struct cmd_msg_cache cache[MLX5_NUM_COMMAND_CACHES]; int checksum_disabled; struct mlx5_cmd_stats stats[MLX5_CMD_OP_MAX]; }; @@ -310,6 +318,13 @@ struct mlx5_buf { u8 page_shift; }; +struct mlx5_frag_buf { + struct mlx5_buf_list *frags; + int npages; + int size; + u8 page_shift; +}; + struct mlx5_eq_tasklet { struct list_head list; struct list_head process_list; @@ -498,6 +513,31 @@ struct mlx5_rl_table { struct mlx5_rl_entry *rl_entry; }; +enum port_module_event_status_type { + MLX5_MODULE_STATUS_PLUGGED = 0x1, + MLX5_MODULE_STATUS_UNPLUGGED = 0x2, + MLX5_MODULE_STATUS_ERROR = 0x3, + MLX5_MODULE_STATUS_NUM = 0x3, +}; + +enum port_module_event_error_type { + MLX5_MODULE_EVENT_ERROR_POWER_BUDGET_EXCEEDED, + MLX5_MODULE_EVENT_ERROR_LONG_RANGE_FOR_NON_MLNX_CABLE_MODULE, + MLX5_MODULE_EVENT_ERROR_BUS_STUCK, + MLX5_MODULE_EVENT_ERROR_NO_EEPROM_RETRY_TIMEOUT, + MLX5_MODULE_EVENT_ERROR_ENFORCE_PART_NUMBER_LIST, + MLX5_MODULE_EVENT_ERROR_UNKNOWN_IDENTIFIER, + MLX5_MODULE_EVENT_ERROR_HIGH_TEMPERATURE, + MLX5_MODULE_EVENT_ERROR_BAD_CABLE, + MLX5_MODULE_EVENT_ERROR_UNKNOWN, + MLX5_MODULE_EVENT_ERROR_NUM, +}; + +struct mlx5_port_module_event_stats { + u64 status_counters[MLX5_MODULE_STATUS_NUM]; + u64 error_counters[MLX5_MODULE_EVENT_ERROR_NUM]; +}; + struct mlx5_priv { char name[MLX5_MAX_NAME_LEN]; struct mlx5_eq_table eq_table; @@ -559,6 +599,8 @@ struct mlx5_priv { unsigned long pci_dev_data; struct mlx5_fc_stats fc_stats; struct mlx5_rl_table rl_table; + + struct mlx5_port_module_event_stats pme_stats; }; enum mlx5_device_state { @@ -787,6 +829,9 @@ int mlx5_buf_alloc_node(struct mlx5_core_dev *dev, int size, struct mlx5_buf *buf, int node); int mlx5_buf_alloc(struct mlx5_core_dev *dev, int size, struct mlx5_buf *buf); void mlx5_buf_free(struct mlx5_core_dev *dev, struct mlx5_buf *buf); +int mlx5_frag_buf_alloc_node(struct mlx5_core_dev *dev, int size, + struct mlx5_frag_buf *buf, int node); +void mlx5_frag_buf_free(struct mlx5_core_dev *dev, struct mlx5_frag_buf *buf); struct mlx5_cmd_mailbox *mlx5_alloc_cmd_mailbox_chain(struct mlx5_core_dev *dev, gfp_t flags, int npages); void mlx5_free_cmd_mailbox_chain(struct mlx5_core_dev *dev, @@ -831,6 +876,7 @@ void mlx5_unregister_debugfs(void); int mlx5_eq_init(struct mlx5_core_dev *dev); void mlx5_eq_cleanup(struct mlx5_core_dev *dev); void mlx5_fill_page_array(struct mlx5_buf *buf, __be64 *pas); +void mlx5_fill_page_frag_array(struct mlx5_frag_buf *frag_buf, __be64 *pas); void mlx5_cq_completion(struct mlx5_core_dev *dev, u32 cqn); void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type); #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 93ebc5e21334..949b24b6c479 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -42,6 +42,10 @@ enum { MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO = 1 << 16, }; +enum { + MLX5_FLOW_TABLE_TUNNEL_EN = BIT(0), +}; + #define LEFTOVERS_RULE_NUM 2 static inline void build_leftovers_ft_param(int *priority, int *n_ent, @@ -69,8 +73,8 @@ enum mlx5_flow_namespace_type { struct mlx5_flow_table; struct mlx5_flow_group; -struct mlx5_flow_rule; struct mlx5_flow_namespace; +struct mlx5_flow_handle; struct mlx5_flow_spec { u8 match_criteria_enable; @@ -97,13 +101,15 @@ mlx5_create_auto_grouped_flow_table(struct mlx5_flow_namespace *ns, int prio, int num_flow_table_entries, int max_num_groups, - u32 level); + u32 level, + u32 flags); struct mlx5_flow_table * mlx5_create_flow_table(struct mlx5_flow_namespace *ns, int prio, int num_flow_table_entries, - u32 level); + u32 level, + u32 flags); struct mlx5_flow_table * mlx5_create_vport_flow_table(struct mlx5_flow_namespace *ns, int prio, @@ -124,21 +130,28 @@ struct mlx5_flow_group * mlx5_create_flow_group(struct mlx5_flow_table *ft, u32 *in); void mlx5_destroy_flow_group(struct mlx5_flow_group *fg); +struct mlx5_flow_act { + u32 action; + u32 flow_tag; + u32 encap_id; +}; + /* Single destination per rule. * Group ID is implied by the match criteria. */ -struct mlx5_flow_rule * -mlx5_add_flow_rule(struct mlx5_flow_table *ft, - struct mlx5_flow_spec *spec, - u32 action, - u32 flow_tag, - struct mlx5_flow_destination *dest); -void mlx5_del_flow_rule(struct mlx5_flow_rule *fr); - -int mlx5_modify_rule_destination(struct mlx5_flow_rule *rule, - struct mlx5_flow_destination *dest); - -struct mlx5_fc *mlx5_flow_rule_counter(struct mlx5_flow_rule *rule); +struct mlx5_flow_handle * +mlx5_add_flow_rules(struct mlx5_flow_table *ft, + struct mlx5_flow_spec *spec, + struct mlx5_flow_act *flow_act, + struct mlx5_flow_destination *dest, + int dest_num); +void mlx5_del_flow_rules(struct mlx5_flow_handle *fr); + +int mlx5_modify_rule_destination(struct mlx5_flow_handle *handler, + struct mlx5_flow_destination *new_dest, + struct mlx5_flow_destination *old_dest); + +struct mlx5_fc *mlx5_flow_rule_counter(struct mlx5_flow_handle *handler); struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging); void mlx5_fc_destroy(struct mlx5_core_dev *dev, struct mlx5_fc *counter); void mlx5_fc_query_cached(struct mlx5_fc *counter, diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 6045d4d58065..a5f0fbedf1e7 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -83,6 +83,7 @@ enum { MLX5_CMD_OP_SET_HCA_CAP = 0x109, MLX5_CMD_OP_QUERY_ISSI = 0x10a, MLX5_CMD_OP_SET_ISSI = 0x10b, + MLX5_CMD_OP_SET_DRIVER_VERSION = 0x10d, MLX5_CMD_OP_CREATE_MKEY = 0x200, MLX5_CMD_OP_QUERY_MKEY = 0x201, MLX5_CMD_OP_DESTROY_MKEY = 0x202, @@ -145,6 +146,12 @@ enum { MLX5_CMD_OP_QUERY_Q_COUNTER = 0x773, MLX5_CMD_OP_SET_RATE_LIMIT = 0x780, MLX5_CMD_OP_QUERY_RATE_LIMIT = 0x781, + MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT = 0x782, + MLX5_CMD_OP_DESTROY_SCHEDULING_ELEMENT = 0x783, + MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT = 0x784, + MLX5_CMD_OP_MODIFY_SCHEDULING_ELEMENT = 0x785, + MLX5_CMD_OP_CREATE_QOS_PARA_VPORT = 0x786, + MLX5_CMD_OP_DESTROY_QOS_PARA_VPORT = 0x787, MLX5_CMD_OP_ALLOC_PD = 0x800, MLX5_CMD_OP_DEALLOC_PD = 0x801, MLX5_CMD_OP_ALLOC_UAR = 0x802, @@ -537,13 +544,27 @@ struct mlx5_ifc_e_switch_cap_bits { struct mlx5_ifc_qos_cap_bits { u8 packet_pacing[0x1]; - u8 reserved_0[0x1f]; - u8 reserved_1[0x20]; + u8 esw_scheduling[0x1]; + u8 reserved_at_2[0x1e]; + + u8 reserved_at_20[0x20]; + u8 packet_pacing_max_rate[0x20]; + u8 packet_pacing_min_rate[0x20]; - u8 reserved_2[0x10]; + + u8 reserved_at_80[0x10]; u8 packet_pacing_rate_table_size[0x10]; - u8 reserved_3[0x760]; + + u8 esw_element_type[0x10]; + u8 esw_tsar_type[0x10]; + + u8 reserved_at_c0[0x10]; + u8 max_qos_para_vport[0x10]; + + u8 max_tsar_bw_share[0x20]; + + u8 reserved_at_100[0x700]; }; struct mlx5_ifc_per_protocol_networking_offload_caps_bits { @@ -804,7 +825,8 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 early_vf_enable[0x1]; u8 reserved_at_1a9[0x2]; u8 local_ca_ack_delay[0x5]; - u8 reserved_at_1af[0x2]; + u8 port_module_event[0x1]; + u8 reserved_at_1b0[0x1]; u8 ports_check[0x1]; u8 reserved_at_1b2[0x1]; u8 disable_link_up[0x1]; @@ -888,7 +910,7 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 log_pg_sz[0x8]; u8 bf[0x1]; - u8 reserved_at_261[0x1]; + u8 driver_version[0x1]; u8 pad_tx_eth_packet[0x1]; u8 reserved_at_263[0x8]; u8 log_bf_reg_size[0x5]; @@ -1735,6 +1757,80 @@ struct mlx5_ifc_eth_802_3_cntrs_grp_data_layout_bits { u8 reserved_at_4c0[0x300]; }; +struct mlx5_ifc_pcie_perf_cntrs_grp_data_layout_bits { + u8 life_time_counter_high[0x20]; + + u8 life_time_counter_low[0x20]; + + u8 rx_errors[0x20]; + + u8 tx_errors[0x20]; + + u8 l0_to_recovery_eieos[0x20]; + + u8 l0_to_recovery_ts[0x20]; + + u8 l0_to_recovery_framing[0x20]; + + u8 l0_to_recovery_retrain[0x20]; + + u8 crc_error_dllp[0x20]; + + u8 crc_error_tlp[0x20]; + + u8 reserved_at_140[0x680]; +}; + +struct mlx5_ifc_pcie_tas_cntrs_grp_data_layout_bits { + u8 life_time_counter_high[0x20]; + + u8 life_time_counter_low[0x20]; + + u8 time_to_boot_image_start[0x20]; + + u8 time_to_link_image[0x20]; + + u8 calibration_time[0x20]; + + u8 time_to_first_perst[0x20]; + + u8 time_to_detect_state[0x20]; + + u8 time_to_l0[0x20]; + + u8 time_to_crs_en[0x20]; + + u8 time_to_plastic_image_start[0x20]; + + u8 time_to_iron_image_start[0x20]; + + u8 perst_handler[0x20]; + + u8 times_in_l1[0x20]; + + u8 times_in_l23[0x20]; + + u8 dl_down[0x20]; + + u8 config_cycle1usec[0x20]; + + u8 config_cycle2to7usec[0x20]; + + u8 config_cycle_8to15usec[0x20]; + + u8 config_cycle_16_to_63usec[0x20]; + + u8 config_cycle_64usec[0x20]; + + u8 correctable_err_msg_sent[0x20]; + + u8 non_fatal_err_msg_sent[0x20]; + + u8 fatal_err_msg_sent[0x20]; + + u8 reserved_at_2e0[0x4e0]; +}; + struct mlx5_ifc_cmd_inter_comp_event_bits { u8 command_completion_vector[0x20]; @@ -2333,6 +2429,30 @@ struct mlx5_ifc_sqc_bits { struct mlx5_ifc_wq_bits wq; }; +enum { + SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR = 0x0, + SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT = 0x1, + SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT_TC = 0x2, + SCHEDULING_CONTEXT_ELEMENT_TYPE_PARA_VPORT_TC = 0x3, +}; + +struct mlx5_ifc_scheduling_context_bits { + u8 element_type[0x8]; + u8 reserved_at_8[0x18]; + + u8 element_attributes[0x20]; + + u8 parent_element_id[0x20]; + + u8 reserved_at_60[0x40]; + + u8 bw_share[0x20]; + + u8 max_average_bw[0x20]; + + u8 reserved_at_e0[0x120]; +}; + struct mlx5_ifc_rqtc_bits { u8 reserved_at_0[0xa0]; @@ -2844,7 +2964,7 @@ struct mlx5_ifc_xrqc_bits { struct mlx5_ifc_tag_matching_topology_context_bits tag_matching_topology_context; - u8 reserved_at_180[0x200]; + u8 reserved_at_180[0x880]; struct mlx5_ifc_wq_bits wq; }; @@ -2875,6 +2995,12 @@ union mlx5_ifc_eth_cntrs_grp_data_layout_auto_bits { u8 reserved_at_0[0x7c0]; }; +union mlx5_ifc_pcie_cntrs_grp_data_layout_auto_bits { + struct mlx5_ifc_pcie_perf_cntrs_grp_data_layout_bits pcie_perf_cntrs_grp_data_layout; + struct mlx5_ifc_pcie_tas_cntrs_grp_data_layout_bits pcie_tas_cntrs_grp_data_layout; + u8 reserved_at_0[0x7c0]; +}; + union mlx5_ifc_event_auto_bits { struct mlx5_ifc_comp_event_bits comp_event; struct mlx5_ifc_dct_events_bits dct_events; @@ -2920,6 +3046,29 @@ struct mlx5_ifc_register_loopback_control_bits { u8 reserved_at_20[0x60]; }; +struct mlx5_ifc_vport_tc_element_bits { + u8 traffic_class[0x4]; + u8 reserved_at_4[0xc]; + u8 vport_number[0x10]; +}; + +struct mlx5_ifc_vport_element_bits { + u8 reserved_at_0[0x10]; + u8 vport_number[0x10]; +}; + +enum { + TSAR_ELEMENT_TSAR_TYPE_DWRR = 0x0, + TSAR_ELEMENT_TSAR_TYPE_ROUND_ROBIN = 0x1, + TSAR_ELEMENT_TSAR_TYPE_ETS = 0x2, +}; + +struct mlx5_ifc_tsar_element_bits { + u8 reserved_at_0[0x8]; + u8 tsar_type[0x8]; + u8 reserved_at_10[0x10]; +}; + struct mlx5_ifc_teardown_hca_out_bits { u8 status[0x8]; u8 reserved_at_8[0x18]; @@ -3540,6 +3689,39 @@ struct mlx5_ifc_query_special_contexts_in_bits { u8 reserved_at_40[0x40]; }; +struct mlx5_ifc_query_scheduling_element_out_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0xc0]; + + struct mlx5_ifc_scheduling_context_bits scheduling_context; + + u8 reserved_at_300[0x100]; +}; + +enum { + SCHEDULING_HIERARCHY_E_SWITCH = 0x2, +}; + +struct mlx5_ifc_query_scheduling_element_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 scheduling_hierarchy[0x8]; + u8 reserved_at_48[0x18]; + + u8 scheduling_element_id[0x20]; + + u8 reserved_at_80[0x180]; +}; + struct mlx5_ifc_query_rqt_out_bits { u8 status[0x8]; u8 reserved_at_8[0x18]; @@ -3904,6 +4086,25 @@ struct mlx5_ifc_query_issi_in_bits { u8 reserved_at_40[0x40]; }; +struct mlx5_ifc_set_driver_version_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_set_driver_version_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 reserved_2[0x40]; + u8 driver_version[64][0x8]; +}; + struct mlx5_ifc_query_hca_vport_pkey_out_bits { u8 status[0x8]; u8 reserved_at_8[0x18]; @@ -4725,6 +4926,43 @@ struct mlx5_ifc_modify_sq_in_bits { struct mlx5_ifc_sqc_bits ctx; }; +struct mlx5_ifc_modify_scheduling_element_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x1c0]; +}; + +enum { + MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_BW_SHARE = 0x1, + MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW = 0x2, +}; + +struct mlx5_ifc_modify_scheduling_element_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 scheduling_hierarchy[0x8]; + u8 reserved_at_48[0x18]; + + u8 scheduling_element_id[0x20]; + + u8 reserved_at_80[0x20]; + + u8 modify_bitmask[0x20]; + + u8 reserved_at_c0[0x40]; + + struct mlx5_ifc_scheduling_context_bits scheduling_context; + + u8 reserved_at_300[0x100]; +}; + struct mlx5_ifc_modify_rqt_out_bits { u8 status[0x8]; u8 reserved_at_8[0x18]; @@ -5390,6 +5628,30 @@ struct mlx5_ifc_destroy_sq_in_bits { u8 reserved_at_60[0x20]; }; +struct mlx5_ifc_destroy_scheduling_element_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x1c0]; +}; + +struct mlx5_ifc_destroy_scheduling_element_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 scheduling_hierarchy[0x8]; + u8 reserved_at_48[0x18]; + + u8 scheduling_element_id[0x20]; + + u8 reserved_at_80[0x180]; +}; + struct mlx5_ifc_destroy_rqt_out_bits { u8 status[0x8]; u8 reserved_at_8[0x18]; @@ -6017,6 +6279,36 @@ struct mlx5_ifc_create_sq_in_bits { struct mlx5_ifc_sqc_bits ctx; }; +struct mlx5_ifc_create_scheduling_element_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; + + u8 scheduling_element_id[0x20]; + + u8 reserved_at_a0[0x160]; +}; + +struct mlx5_ifc_create_scheduling_element_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 scheduling_hierarchy[0x8]; + u8 reserved_at_48[0x18]; + + u8 reserved_at_60[0xa0]; + + struct mlx5_ifc_scheduling_context_bits scheduling_context; + + u8 reserved_at_300[0x100]; +}; + struct mlx5_ifc_create_rqt_out_bits { u8 status[0x8]; u8 reserved_at_8[0x18]; @@ -7028,6 +7320,18 @@ struct mlx5_ifc_ppcnt_reg_bits { union mlx5_ifc_eth_cntrs_grp_data_layout_auto_bits counter_set; }; +struct mlx5_ifc_mpcnt_reg_bits { + u8 reserved_at_0[0x8]; + u8 pcie_index[0x8]; + u8 reserved_at_10[0xa]; + u8 grp[0x6]; + + u8 clr[0x1]; + u8 reserved_at_21[0x1f]; + + union mlx5_ifc_pcie_cntrs_grp_data_layout_auto_bits counter_set; +}; + struct mlx5_ifc_ppad_reg_bits { u8 reserved_at_0[0x3]; u8 single_mac[0x1]; @@ -7633,6 +7937,7 @@ union mlx5_ifc_ports_control_registers_document_bits { struct mlx5_ifc_pmtu_reg_bits pmtu_reg; struct mlx5_ifc_ppad_reg_bits ppad_reg; struct mlx5_ifc_ppcnt_reg_bits ppcnt_reg; + struct mlx5_ifc_mpcnt_reg_bits mpcnt_reg; struct mlx5_ifc_pplm_reg_bits pplm_reg; struct mlx5_ifc_pplr_reg_bits pplr_reg; struct mlx5_ifc_ppsc_reg_bits ppsc_reg; diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h index b3065acd20b4..e527732fb31b 100644 --- a/include/linux/mlx5/port.h +++ b/include/linux/mlx5/port.h @@ -94,6 +94,9 @@ enum mlx5e_link_mode { #define MLX5E_PROT_MASK(link_mode) (1 << link_mode) +#define PORT_MODULE_EVENT_MODULE_STATUS_MASK 0xF +#define PORT_MODULE_EVENT_ERROR_TYPE_MASK 0xF + int mlx5_set_port_caps(struct mlx5_core_dev *dev, u8 port_num, u32 caps); int mlx5_query_port_ptys(struct mlx5_core_dev *dev, u32 *ptys, int ptys_size, int proto_mask, u8 local_port); @@ -138,8 +141,12 @@ int mlx5_query_port_pfc(struct mlx5_core_dev *dev, u8 *pfc_en_tx, int mlx5_max_tc(struct mlx5_core_dev *mdev); int mlx5_set_port_prio_tc(struct mlx5_core_dev *mdev, u8 *prio_tc); +int mlx5_query_port_prio_tc(struct mlx5_core_dev *mdev, + u8 prio, u8 *tc); int mlx5_set_port_tc_group(struct mlx5_core_dev *mdev, u8 *tc_group); int mlx5_set_port_tc_bw_alloc(struct mlx5_core_dev *mdev, u8 *tc_bw); +int mlx5_query_port_tc_bw_alloc(struct mlx5_core_dev *mdev, + u8 tc, u8 *bw_pct); int mlx5_modify_port_ets_rate_limit(struct mlx5_core_dev *mdev, u8 *max_bw_value, u8 *max_bw_unit); @@ -155,4 +162,6 @@ void mlx5_query_port_fcs(struct mlx5_core_dev *mdev, bool *supported, int mlx5_query_module_eeprom(struct mlx5_core_dev *dev, u16 offset, u16 size, u8 *data); +int mlx5_query_port_dcbx_param(struct mlx5_core_dev *mdev, u32 *out); +int mlx5_set_port_dcbx_param(struct mlx5_core_dev *mdev, u32 *in); #endif /* __MLX5_PORT_H__ */ diff --git a/include/linux/mlx5/srq.h b/include/linux/mlx5/srq.h index 33c97dc900f8..1cde0fd53f90 100644 --- a/include/linux/mlx5/srq.h +++ b/include/linux/mlx5/srq.h @@ -55,7 +55,7 @@ struct mlx5_srq_attr { u32 lwm; u32 user_index; u64 db_record; - u64 *pas; + __be64 *pas; }; struct mlx5_core_dev; diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h index 451b0bde9083..ec35157ea725 100644 --- a/include/linux/mlx5/vport.h +++ b/include/linux/mlx5/vport.h @@ -36,6 +36,12 @@ #include <linux/mlx5/driver.h> #include <linux/mlx5/device.h> +enum { + MLX5_CAP_INLINE_MODE_L2, + MLX5_CAP_INLINE_MODE_VPORT_CONTEXT, + MLX5_CAP_INLINE_MODE_NOT_REQUIRED, +}; + u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport); u8 mlx5_query_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport); @@ -43,8 +49,8 @@ int mlx5_modify_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport, u8 state); int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev, u16 vport, u8 *addr); -void mlx5_query_nic_vport_min_inline(struct mlx5_core_dev *mdev, - u8 *min_inline); +int mlx5_query_nic_vport_min_inline(struct mlx5_core_dev *mdev, + u16 vport, u8 *min_inline); int mlx5_modify_nic_vport_min_inline(struct mlx5_core_dev *mdev, u16 vport, u8 min_inline); int mlx5_modify_nic_vport_mac_address(struct mlx5_core_dev *dev, diff --git a/include/linux/mutex-debug.h b/include/linux/mutex-debug.h deleted file mode 100644 index 4ac8b1977b73..000000000000 --- a/include/linux/mutex-debug.h +++ /dev/null @@ -1,24 +0,0 @@ -#ifndef __LINUX_MUTEX_DEBUG_H -#define __LINUX_MUTEX_DEBUG_H - -#include <linux/linkage.h> -#include <linux/lockdep.h> -#include <linux/debug_locks.h> - -/* - * Mutexes - debugging helpers: - */ - -#define __DEBUG_MUTEX_INITIALIZER(lockname) \ - , .magic = &lockname - -#define mutex_init(mutex) \ -do { \ - static struct lock_class_key __key; \ - \ - __mutex_init((mutex), #mutex, &__key); \ -} while (0) - -extern void mutex_destroy(struct mutex *lock); - -#endif diff --git a/include/linux/mutex.h b/include/linux/mutex.h index 2cb7531e7d7a..b97870f2debd 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -18,6 +18,7 @@ #include <linux/atomic.h> #include <asm/processor.h> #include <linux/osq_lock.h> +#include <linux/debug_locks.h> /* * Simple, straightforward mutexes with strict semantics: @@ -48,16 +49,12 @@ * locks and tasks (and only those tasks) */ struct mutex { - /* 1: unlocked, 0: locked, negative: locked, possible waiters */ - atomic_t count; + atomic_long_t owner; spinlock_t wait_lock; - struct list_head wait_list; -#if defined(CONFIG_DEBUG_MUTEXES) || defined(CONFIG_MUTEX_SPIN_ON_OWNER) - struct task_struct *owner; -#endif #ifdef CONFIG_MUTEX_SPIN_ON_OWNER struct optimistic_spin_queue osq; /* Spinner MCS lock */ #endif + struct list_head wait_list; #ifdef CONFIG_DEBUG_MUTEXES void *magic; #endif @@ -66,6 +63,11 @@ struct mutex { #endif }; +static inline struct task_struct *__mutex_owner(struct mutex *lock) +{ + return (struct task_struct *)(atomic_long_read(&lock->owner) & ~0x03); +} + /* * This is the control structure for tasks blocked on mutex, * which resides on the blocked task's kernel stack: @@ -79,9 +81,20 @@ struct mutex_waiter { }; #ifdef CONFIG_DEBUG_MUTEXES -# include <linux/mutex-debug.h> + +#define __DEBUG_MUTEX_INITIALIZER(lockname) \ + , .magic = &lockname + +extern void mutex_destroy(struct mutex *lock); + #else + # define __DEBUG_MUTEX_INITIALIZER(lockname) + +static inline void mutex_destroy(struct mutex *lock) {} + +#endif + /** * mutex_init - initialize the mutex * @mutex: the mutex to be initialized @@ -90,14 +103,12 @@ struct mutex_waiter { * * It is not allowed to initialize an already locked mutex. */ -# define mutex_init(mutex) \ -do { \ - static struct lock_class_key __key; \ - \ - __mutex_init((mutex), #mutex, &__key); \ +#define mutex_init(mutex) \ +do { \ + static struct lock_class_key __key; \ + \ + __mutex_init((mutex), #mutex, &__key); \ } while (0) -static inline void mutex_destroy(struct mutex *lock) {} -#endif #ifdef CONFIG_DEBUG_LOCK_ALLOC # define __DEP_MAP_MUTEX_INITIALIZER(lockname) \ @@ -107,7 +118,7 @@ static inline void mutex_destroy(struct mutex *lock) {} #endif #define __MUTEX_INITIALIZER(lockname) \ - { .count = ATOMIC_INIT(1) \ + { .owner = ATOMIC_LONG_INIT(0) \ , .wait_lock = __SPIN_LOCK_UNLOCKED(lockname.wait_lock) \ , .wait_list = LIST_HEAD_INIT(lockname.wait_list) \ __DEBUG_MUTEX_INITIALIZER(lockname) \ @@ -127,7 +138,10 @@ extern void __mutex_init(struct mutex *lock, const char *name, */ static inline int mutex_is_locked(struct mutex *lock) { - return atomic_read(&lock->count) != 1; + /* + * XXX think about spin_is_locked + */ + return __mutex_owner(lock) != NULL; } /* @@ -175,4 +189,35 @@ extern void mutex_unlock(struct mutex *lock); extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock); +/* + * These values are chosen such that FAIL and SUCCESS match the + * values of the regular mutex_trylock(). + */ +enum mutex_trylock_recursive_enum { + MUTEX_TRYLOCK_FAILED = 0, + MUTEX_TRYLOCK_SUCCESS = 1, + MUTEX_TRYLOCK_RECURSIVE, +}; + +/** + * mutex_trylock_recursive - trylock variant that allows recursive locking + * @lock: mutex to be locked + * + * This function should not be used, _ever_. It is purely for hysterical GEM + * raisins, and once those are gone this will be removed. + * + * Returns: + * MUTEX_TRYLOCK_FAILED - trylock failed, + * MUTEX_TRYLOCK_SUCCESS - lock acquired, + * MUTEX_TRYLOCK_RECURSIVE - we already owned the lock. + */ +static inline /* __deprecated */ __must_check enum mutex_trylock_recursive_enum +mutex_trylock_recursive(struct mutex *lock) +{ + if (unlikely(__mutex_owner(lock) == current)) + return MUTEX_TRYLOCK_RECURSIVE; + + return mutex_trylock(lock); +} + #endif /* __LINUX_MUTEX_H */ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index bf04a46f6d5b..994f7423a74b 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -192,6 +192,7 @@ struct net_device_stats { #ifdef CONFIG_RPS #include <linux/static_key.h> extern struct static_key rps_needed; +extern struct static_key rfs_needed; #endif struct neighbour; @@ -316,7 +317,6 @@ struct napi_struct { unsigned int gro_count; int (*poll)(struct napi_struct *, int); #ifdef CONFIG_NETPOLL - spinlock_t poll_lock; int poll_owner; #endif struct net_device *dev; @@ -334,6 +334,16 @@ enum { NAPI_STATE_NPSVC, /* Netpoll - don't dequeue from poll_list */ NAPI_STATE_HASHED, /* In NAPI hash (busy polling possible) */ NAPI_STATE_NO_BUSY_POLL,/* Do not add in napi_hash, no busy polling */ + NAPI_STATE_IN_BUSY_POLL,/* sk_busy_loop() owns this NAPI */ +}; + +enum { + NAPIF_STATE_SCHED = (1UL << NAPI_STATE_SCHED), + NAPIF_STATE_DISABLE = (1UL << NAPI_STATE_DISABLE), + NAPIF_STATE_NPSVC = (1UL << NAPI_STATE_NPSVC), + NAPIF_STATE_HASHED = (1UL << NAPI_STATE_HASHED), + NAPIF_STATE_NO_BUSY_POLL = (1UL << NAPI_STATE_NO_BUSY_POLL), + NAPIF_STATE_IN_BUSY_POLL = (1UL << NAPI_STATE_IN_BUSY_POLL), }; enum gro_result { @@ -453,32 +463,22 @@ static inline bool napi_reschedule(struct napi_struct *napi) return false; } -void __napi_complete(struct napi_struct *n); -void napi_complete_done(struct napi_struct *n, int work_done); +bool __napi_complete(struct napi_struct *n); +bool napi_complete_done(struct napi_struct *n, int work_done); /** * napi_complete - NAPI processing complete * @n: NAPI context * * Mark NAPI processing as complete. * Consider using napi_complete_done() instead. + * Return false if device should avoid rearming interrupts. */ -static inline void napi_complete(struct napi_struct *n) +static inline bool napi_complete(struct napi_struct *n) { return napi_complete_done(n, 0); } /** - * napi_hash_add - add a NAPI to global hashtable - * @napi: NAPI context - * - * Generate a new napi_id and store a @napi under it in napi_hash. - * Used for busy polling (CONFIG_NET_RX_BUSY_POLL). - * Note: This is normally automatically done from netif_napi_add(), - * so might disappear in a future Linux version. - */ -void napi_hash_add(struct napi_struct *napi); - -/** * napi_hash_del - remove a NAPI from global table * @napi: NAPI context * @@ -732,8 +732,8 @@ struct xps_dev_maps { struct rcu_head rcu; struct xps_map __rcu *cpu_map[0]; }; -#define XPS_DEV_MAPS_SIZE (sizeof(struct xps_dev_maps) + \ - (nr_cpu_ids * sizeof(struct xps_map *))) +#define XPS_DEV_MAPS_SIZE(_tcs) (sizeof(struct xps_dev_maps) + \ + (nr_cpu_ids * (_tcs) * sizeof(struct xps_map *))) #endif /* CONFIG_XPS */ #define TC_MAX_QUEUE 16 @@ -803,6 +803,7 @@ struct tc_to_netdev { struct tc_cls_matchall_offload *cls_mall; struct tc_cls_bpf_offload *cls_bpf; }; + bool egress_dev; }; /* These structures hold the attributes of xdp state that are being passed @@ -926,7 +927,7 @@ struct netdev_xdp { * 3. Update dev->stats asynchronously and atomically, and define * neither operation. * - * bool (*ndo_has_offload_stats)(int attr_id) + * bool (*ndo_has_offload_stats)(const struct net_device *dev, int attr_id) * Return true if this device supports offload stats of this attr_id. * * int (*ndo_get_offload_stats)(int attr_id, const struct net_device *dev, @@ -1166,7 +1167,7 @@ struct net_device_ops { struct rtnl_link_stats64* (*ndo_get_stats64)(struct net_device *dev, struct rtnl_link_stats64 *storage); - bool (*ndo_has_offload_stats)(int attr_id); + bool (*ndo_has_offload_stats)(const struct net_device *dev, int attr_id); int (*ndo_get_offload_stats)(int attr_id, const struct net_device *dev, void *attr_data); @@ -1456,7 +1457,6 @@ enum netdev_priv_flags { * @ptype_specific: Device-specific, protocol-specific packet handlers * * @adj_list: Directly linked devices, like slaves for bonding - * @all_adj_list: All linked devices, *including* neighbours * @features: Currently active device features * @hw_features: User-changeable features * @@ -1506,6 +1506,8 @@ enum netdev_priv_flags { * @if_port: Selectable AUI, TP, ... * @dma: DMA channel * @mtu: Interface MTU value + * @min_mtu: Interface Minimum MTU value + * @max_mtu: Interface Maximum MTU value * @type: Interface hardware type * @hard_header_len: Maximum hardware header length. * @@ -1619,7 +1621,7 @@ enum netdev_priv_flags { * @dcbnl_ops: Data Center Bridging netlink ops * @num_tc: Number of traffic classes in the net device * @tc_to_txq: XXX: need comments on this one - * @prio_tc_map XXX: need comments on this one + * @prio_tc_map: XXX: need comments on this one * * @fcoe_ddp_xid: Max exchange id for FCoE LRO by ddp * @@ -1673,11 +1675,6 @@ struct net_device { struct list_head lower; } adj_list; - struct { - struct list_head upper; - struct list_head lower; - } all_adj_list; - netdev_features_t features; netdev_features_t hw_features; netdev_features_t wanted_features; @@ -1726,6 +1723,8 @@ struct net_device { unsigned char dma; unsigned int mtu; + unsigned int min_mtu; + unsigned int max_mtu; unsigned short type; unsigned short hard_header_len; @@ -1922,34 +1921,10 @@ int netdev_set_prio_tc_map(struct net_device *dev, u8 prio, u8 tc) return 0; } -static inline -void netdev_reset_tc(struct net_device *dev) -{ - dev->num_tc = 0; - memset(dev->tc_to_txq, 0, sizeof(dev->tc_to_txq)); - memset(dev->prio_tc_map, 0, sizeof(dev->prio_tc_map)); -} - -static inline -int netdev_set_tc_queue(struct net_device *dev, u8 tc, u16 count, u16 offset) -{ - if (tc >= dev->num_tc) - return -EINVAL; - - dev->tc_to_txq[tc].count = count; - dev->tc_to_txq[tc].offset = offset; - return 0; -} - -static inline -int netdev_set_num_tc(struct net_device *dev, u8 num_tc) -{ - if (num_tc > TC_MAX_QUEUE) - return -EINVAL; - - dev->num_tc = num_tc; - return 0; -} +int netdev_txq_to_tc(struct net_device *dev, unsigned int txq); +void netdev_reset_tc(struct net_device *dev); +int netdev_set_tc_queue(struct net_device *dev, u8 tc, u16 count, u16 offset); +int netdev_set_num_tc(struct net_device *dev, u8 num_tc); static inline int netdev_get_num_tc(struct net_device *dev) @@ -2686,71 +2661,6 @@ static inline void skb_gro_remcsum_cleanup(struct sk_buff *skb, remcsum_unadjust((__sum16 *)ptr, grc->delta); } -struct skb_csum_offl_spec { - __u16 ipv4_okay:1, - ipv6_okay:1, - encap_okay:1, - ip_options_okay:1, - ext_hdrs_okay:1, - tcp_okay:1, - udp_okay:1, - sctp_okay:1, - vlan_okay:1, - no_encapped_ipv6:1, - no_not_encapped:1; -}; - -bool __skb_csum_offload_chk(struct sk_buff *skb, - const struct skb_csum_offl_spec *spec, - bool *csum_encapped, - bool csum_help); - -static inline bool skb_csum_offload_chk(struct sk_buff *skb, - const struct skb_csum_offl_spec *spec, - bool *csum_encapped, - bool csum_help) -{ - if (skb->ip_summed != CHECKSUM_PARTIAL) - return false; - - return __skb_csum_offload_chk(skb, spec, csum_encapped, csum_help); -} - -static inline bool skb_csum_offload_chk_help(struct sk_buff *skb, - const struct skb_csum_offl_spec *spec) -{ - bool csum_encapped; - - return skb_csum_offload_chk(skb, spec, &csum_encapped, true); -} - -static inline bool skb_csum_off_chk_help_cmn(struct sk_buff *skb) -{ - static const struct skb_csum_offl_spec csum_offl_spec = { - .ipv4_okay = 1, - .ip_options_okay = 1, - .ipv6_okay = 1, - .vlan_okay = 1, - .tcp_okay = 1, - .udp_okay = 1, - }; - - return skb_csum_offload_chk_help(skb, &csum_offl_spec); -} - -static inline bool skb_csum_off_chk_help_cmn_v4_only(struct sk_buff *skb) -{ - static const struct skb_csum_offl_spec csum_offl_spec = { - .ipv4_okay = 1, - .ip_options_okay = 1, - .tcp_okay = 1, - .udp_okay = 1, - .vlan_okay = 1, - }; - - return skb_csum_offload_chk_help(skb, &csum_offl_spec); -} - static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, const void *daddr, const void *saddr, @@ -3345,7 +3255,7 @@ int dev_get_phys_port_id(struct net_device *dev, int dev_get_phys_port_name(struct net_device *dev, char *name, size_t len); int dev_change_proto_down(struct net_device *dev, bool proto_down); -int dev_change_xdp_fd(struct net_device *dev, int fd); +int dev_change_xdp_fd(struct net_device *dev, int fd, u32 flags); struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev); struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq, int *ret); @@ -3554,6 +3464,17 @@ static inline void __netif_tx_lock(struct netdev_queue *txq, int cpu) txq->xmit_lock_owner = cpu; } +static inline bool __netif_tx_acquire(struct netdev_queue *txq) +{ + __acquire(&txq->_xmit_lock); + return true; +} + +static inline void __netif_tx_release(struct netdev_queue *txq) +{ + __release(&txq->_xmit_lock); +} + static inline void __netif_tx_lock_bh(struct netdev_queue *txq) { spin_lock_bh(&txq->_xmit_lock); @@ -3655,17 +3576,21 @@ static inline void netif_tx_unlock_bh(struct net_device *dev) #define HARD_TX_LOCK(dev, txq, cpu) { \ if ((dev->features & NETIF_F_LLTX) == 0) { \ __netif_tx_lock(txq, cpu); \ + } else { \ + __netif_tx_acquire(txq); \ } \ } #define HARD_TX_TRYLOCK(dev, txq) \ (((dev->features & NETIF_F_LLTX) == 0) ? \ __netif_tx_trylock(txq) : \ - true ) + __netif_tx_acquire(txq)) #define HARD_TX_UNLOCK(dev, txq) { \ if ((dev->features & NETIF_F_LLTX) == 0) { \ __netif_tx_unlock(txq); \ + } else { \ + __netif_tx_release(txq); \ } \ } @@ -3884,12 +3809,13 @@ struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *dev, updev; \ updev = netdev_upper_get_next_dev_rcu(dev, &(iter))) -/* iterate through upper list, must be called under RCU read lock */ -#define netdev_for_each_all_upper_dev_rcu(dev, updev, iter) \ - for (iter = &(dev)->all_adj_list.upper, \ - updev = netdev_all_upper_get_next_dev_rcu(dev, &(iter)); \ - updev; \ - updev = netdev_all_upper_get_next_dev_rcu(dev, &(iter))) +int netdev_walk_all_upper_dev_rcu(struct net_device *dev, + int (*fn)(struct net_device *upper_dev, + void *data), + void *data); + +bool netdev_has_upper_dev_all_rcu(struct net_device *dev, + struct net_device *upper_dev); void *netdev_lower_get_next_private(struct net_device *dev, struct list_head **iter); @@ -3922,17 +3848,14 @@ struct net_device *netdev_all_lower_get_next(struct net_device *dev, struct net_device *netdev_all_lower_get_next_rcu(struct net_device *dev, struct list_head **iter); -#define netdev_for_each_all_lower_dev(dev, ldev, iter) \ - for (iter = (dev)->all_adj_list.lower.next, \ - ldev = netdev_all_lower_get_next(dev, &(iter)); \ - ldev; \ - ldev = netdev_all_lower_get_next(dev, &(iter))) - -#define netdev_for_each_all_lower_dev_rcu(dev, ldev, iter) \ - for (iter = &(dev)->all_adj_list.lower, \ - ldev = netdev_all_lower_get_next_rcu(dev, &(iter)); \ - ldev; \ - ldev = netdev_all_lower_get_next_rcu(dev, &(iter))) +int netdev_walk_all_lower_dev(struct net_device *dev, + int (*fn)(struct net_device *lower_dev, + void *data), + void *data); +int netdev_walk_all_lower_dev_rcu(struct net_device *dev, + int (*fn)(struct net_device *lower_dev, + void *data), + void *data); void *netdev_adjacent_get_private(struct list_head *adj_list); void *netdev_lower_get_first_private_rcu(struct net_device *dev); @@ -4009,19 +3932,6 @@ static inline bool can_checksum_protocol(netdev_features_t features, } } -/* Map an ethertype into IP protocol if possible */ -static inline int eproto_to_ipproto(int eproto) -{ - switch (eproto) { - case htons(ETH_P_IP): - return IPPROTO_IP; - case htons(ETH_P_IPV6): - return IPPROTO_IPV6; - default: - return -1; - } -} - #ifdef CONFIG_BUG void netdev_rx_csum_fault(struct net_device *dev); #else diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index abc7fdcb9eb1..a4b97be30b28 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -49,13 +49,11 @@ struct sock; struct nf_hook_state { unsigned int hook; - int thresh; u_int8_t pf; struct net_device *in; struct net_device *out; struct sock *sk; struct net *net; - struct nf_hook_entry __rcu *hook_entries; int (*okfn)(struct net *, struct sock *, struct sk_buff *); }; @@ -77,14 +75,42 @@ struct nf_hook_ops { struct nf_hook_entry { struct nf_hook_entry __rcu *next; - struct nf_hook_ops ops; + nf_hookfn *hook; + void *priv; const struct nf_hook_ops *orig_ops; }; +static inline void +nf_hook_entry_init(struct nf_hook_entry *entry, const struct nf_hook_ops *ops) +{ + entry->next = NULL; + entry->hook = ops->hook; + entry->priv = ops->priv; + entry->orig_ops = ops; +} + +static inline int +nf_hook_entry_priority(const struct nf_hook_entry *entry) +{ + return entry->orig_ops->priority; +} + +static inline int +nf_hook_entry_hookfn(const struct nf_hook_entry *entry, struct sk_buff *skb, + struct nf_hook_state *state) +{ + return entry->hook(entry->priv, skb, state); +} + +static inline const struct nf_hook_ops * +nf_hook_entry_ops(const struct nf_hook_entry *entry) +{ + return entry->orig_ops; +} + static inline void nf_hook_state_init(struct nf_hook_state *p, - struct nf_hook_entry *hook_entry, unsigned int hook, - int thresh, u_int8_t pf, + u_int8_t pf, struct net_device *indev, struct net_device *outdev, struct sock *sk, @@ -92,13 +118,11 @@ static inline void nf_hook_state_init(struct nf_hook_state *p, int (*okfn)(struct net *, struct sock *, struct sk_buff *)) { p->hook = hook; - p->thresh = thresh; p->pf = pf; p->in = indev; p->out = outdev; p->sk = sk; p->net = net; - RCU_INIT_POINTER(p->hook_entries, hook_entry); p->okfn = okfn; } @@ -152,23 +176,20 @@ void nf_unregister_sockopt(struct nf_sockopt_ops *reg); extern struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; #endif -int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state); +int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state, + struct nf_hook_entry *entry); /** - * nf_hook_thresh - call a netfilter hook + * nf_hook - call a netfilter hook * * Returns 1 if the hook has allowed the packet to pass. The function * okfn must be invoked by the caller in this case. Any other return * value indicates the packet has been consumed by the hook. */ -static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook, - struct net *net, - struct sock *sk, - struct sk_buff *skb, - struct net_device *indev, - struct net_device *outdev, - int (*okfn)(struct net *, struct sock *, struct sk_buff *), - int thresh) +static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net, + struct sock *sk, struct sk_buff *skb, + struct net_device *indev, struct net_device *outdev, + int (*okfn)(struct net *, struct sock *, struct sk_buff *)) { struct nf_hook_entry *hook_head; int ret = 1; @@ -185,24 +206,16 @@ static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook, if (hook_head) { struct nf_hook_state state; - nf_hook_state_init(&state, hook_head, hook, thresh, - pf, indev, outdev, sk, net, okfn); + nf_hook_state_init(&state, hook, pf, indev, outdev, + sk, net, okfn); - ret = nf_hook_slow(skb, &state); + ret = nf_hook_slow(skb, &state, hook_head); } rcu_read_unlock(); return ret; } -static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net, - struct sock *sk, struct sk_buff *skb, - struct net_device *indev, struct net_device *outdev, - int (*okfn)(struct net *, struct sock *, struct sk_buff *)) -{ - return nf_hook_thresh(pf, hook, net, sk, skb, indev, outdev, okfn, INT_MIN); -} - /* Activate hook; either okfn or kfree_skb called, unless a hook returns NF_STOLEN (in which case, it's up to the hook to deal with the consequences). @@ -221,19 +234,6 @@ static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net, */ static inline int -NF_HOOK_THRESH(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk, - struct sk_buff *skb, struct net_device *in, - struct net_device *out, - int (*okfn)(struct net *, struct sock *, struct sk_buff *), - int thresh) -{ - int ret = nf_hook_thresh(pf, hook, net, sk, skb, in, out, okfn, thresh); - if (ret == 1) - ret = okfn(net, sk, skb); - return ret; -} - -static inline int NF_HOOK_COND(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk, struct sk_buff *skb, struct net_device *in, struct net_device *out, int (*okfn)(struct net *, struct sock *, struct sk_buff *), @@ -242,7 +242,7 @@ NF_HOOK_COND(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk, int ret; if (!cond || - ((ret = nf_hook_thresh(pf, hook, net, sk, skb, in, out, okfn, INT_MIN)) == 1)) + ((ret = nf_hook(pf, hook, net, sk, skb, in, out, okfn)) == 1)) ret = okfn(net, sk, skb); return ret; } @@ -252,7 +252,10 @@ NF_HOOK(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk, struct struct net_device *in, struct net_device *out, int (*okfn)(struct net *, struct sock *, struct sk_buff *)) { - return NF_HOOK_THRESH(pf, hook, net, sk, skb, in, out, okfn, INT_MIN); + int ret = nf_hook(pf, hook, net, sk, skb, in, out, okfn); + if (ret == 1) + ret = okfn(net, sk, skb); + return ret; } /* Call setsockopt() */ diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index 83b9a2e0d8d4..8e42253e5d4d 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -79,10 +79,12 @@ enum ip_set_ext_id { IPSET_EXT_ID_MAX, }; +struct ip_set; + /* Extension type */ struct ip_set_ext_type { /* Destroy extension private data (can be NULL) */ - void (*destroy)(void *ext); + void (*destroy)(struct ip_set *set, void *ext); enum ip_set_extension type; enum ipset_cadt_flags flag; /* Size and minimal alignment */ @@ -92,17 +94,6 @@ struct ip_set_ext_type { extern const struct ip_set_ext_type ip_set_extensions[]; -struct ip_set_ext { - u64 packets; - u64 bytes; - u32 timeout; - u32 skbmark; - u32 skbmarkmask; - u32 skbprio; - u16 skbqueue; - char *comment; -}; - struct ip_set_counter { atomic64_t bytes; atomic64_t packets; @@ -122,6 +113,15 @@ struct ip_set_skbinfo { u32 skbmarkmask; u32 skbprio; u16 skbqueue; + u16 __pad; +}; + +struct ip_set_ext { + struct ip_set_skbinfo skbinfo; + u64 packets; + u64 bytes; + char *comment; + u32 timeout; }; struct ip_set; @@ -252,6 +252,10 @@ struct ip_set { u8 flags; /* Default timeout value, if enabled */ u32 timeout; + /* Number of elements (vs timeout) */ + u32 elements; + /* Size of the dynamic extensions (vs timeout) */ + size_t ext_size; /* Element data size */ size_t dsize; /* Offsets to extensions in elements */ @@ -268,7 +272,7 @@ ip_set_ext_destroy(struct ip_set *set, void *data) */ if (SET_WITH_COMMENT(set)) ip_set_extensions[IPSET_EXT_ID_COMMENT].destroy( - ext_comment(data, set)); + set, ext_comment(data, set)); } static inline int @@ -294,104 +298,6 @@ ip_set_put_flags(struct sk_buff *skb, struct ip_set *set) return nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(cadt_flags)); } -static inline void -ip_set_add_bytes(u64 bytes, struct ip_set_counter *counter) -{ - atomic64_add((long long)bytes, &(counter)->bytes); -} - -static inline void -ip_set_add_packets(u64 packets, struct ip_set_counter *counter) -{ - atomic64_add((long long)packets, &(counter)->packets); -} - -static inline u64 -ip_set_get_bytes(const struct ip_set_counter *counter) -{ - return (u64)atomic64_read(&(counter)->bytes); -} - -static inline u64 -ip_set_get_packets(const struct ip_set_counter *counter) -{ - return (u64)atomic64_read(&(counter)->packets); -} - -static inline void -ip_set_update_counter(struct ip_set_counter *counter, - const struct ip_set_ext *ext, - struct ip_set_ext *mext, u32 flags) -{ - if (ext->packets != ULLONG_MAX && - !(flags & IPSET_FLAG_SKIP_COUNTER_UPDATE)) { - ip_set_add_bytes(ext->bytes, counter); - ip_set_add_packets(ext->packets, counter); - } - if (flags & IPSET_FLAG_MATCH_COUNTERS) { - mext->packets = ip_set_get_packets(counter); - mext->bytes = ip_set_get_bytes(counter); - } -} - -static inline void -ip_set_get_skbinfo(struct ip_set_skbinfo *skbinfo, - const struct ip_set_ext *ext, - struct ip_set_ext *mext, u32 flags) -{ - mext->skbmark = skbinfo->skbmark; - mext->skbmarkmask = skbinfo->skbmarkmask; - mext->skbprio = skbinfo->skbprio; - mext->skbqueue = skbinfo->skbqueue; -} -static inline bool -ip_set_put_skbinfo(struct sk_buff *skb, struct ip_set_skbinfo *skbinfo) -{ - /* Send nonzero parameters only */ - return ((skbinfo->skbmark || skbinfo->skbmarkmask) && - nla_put_net64(skb, IPSET_ATTR_SKBMARK, - cpu_to_be64((u64)skbinfo->skbmark << 32 | - skbinfo->skbmarkmask), - IPSET_ATTR_PAD)) || - (skbinfo->skbprio && - nla_put_net32(skb, IPSET_ATTR_SKBPRIO, - cpu_to_be32(skbinfo->skbprio))) || - (skbinfo->skbqueue && - nla_put_net16(skb, IPSET_ATTR_SKBQUEUE, - cpu_to_be16(skbinfo->skbqueue))); -} - -static inline void -ip_set_init_skbinfo(struct ip_set_skbinfo *skbinfo, - const struct ip_set_ext *ext) -{ - skbinfo->skbmark = ext->skbmark; - skbinfo->skbmarkmask = ext->skbmarkmask; - skbinfo->skbprio = ext->skbprio; - skbinfo->skbqueue = ext->skbqueue; -} - -static inline bool -ip_set_put_counter(struct sk_buff *skb, struct ip_set_counter *counter) -{ - return nla_put_net64(skb, IPSET_ATTR_BYTES, - cpu_to_be64(ip_set_get_bytes(counter)), - IPSET_ATTR_PAD) || - nla_put_net64(skb, IPSET_ATTR_PACKETS, - cpu_to_be64(ip_set_get_packets(counter)), - IPSET_ATTR_PAD); -} - -static inline void -ip_set_init_counter(struct ip_set_counter *counter, - const struct ip_set_ext *ext) -{ - if (ext->bytes != ULLONG_MAX) - atomic64_set(&(counter)->bytes, (long long)(ext->bytes)); - if (ext->packets != ULLONG_MAX) - atomic64_set(&(counter)->packets, (long long)(ext->packets)); -} - /* Netlink CB args */ enum { IPSET_CB_NET = 0, /* net namespace */ @@ -431,6 +337,8 @@ extern size_t ip_set_elem_len(struct ip_set *set, struct nlattr *tb[], size_t len, size_t align); extern int ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[], struct ip_set_ext *ext); +extern int ip_set_put_extensions(struct sk_buff *skb, const struct ip_set *set, + const void *e, bool active); static inline int ip_set_get_hostipaddr4(struct nlattr *nla, u32 *ipaddr) @@ -546,10 +454,8 @@ bitmap_bytes(u32 a, u32 b) #include <linux/netfilter/ipset/ip_set_timeout.h> #include <linux/netfilter/ipset/ip_set_comment.h> - -int -ip_set_put_extensions(struct sk_buff *skb, const struct ip_set *set, - const void *e, bool active); +#include <linux/netfilter/ipset/ip_set_counter.h> +#include <linux/netfilter/ipset/ip_set_skbinfo.h> #define IP_SET_INIT_KEXT(skb, opt, set) \ { .bytes = (skb)->len, .packets = 1, \ diff --git a/include/linux/netfilter/ipset/ip_set_bitmap.h b/include/linux/netfilter/ipset/ip_set_bitmap.h index 5e4662a71e01..366d6c0ea04f 100644 --- a/include/linux/netfilter/ipset/ip_set_bitmap.h +++ b/include/linux/netfilter/ipset/ip_set_bitmap.h @@ -6,8 +6,8 @@ #define IPSET_BITMAP_MAX_RANGE 0x0000FFFF enum { + IPSET_ADD_STORE_PLAIN_TIMEOUT = -1, IPSET_ADD_FAILED = 1, - IPSET_ADD_STORE_PLAIN_TIMEOUT, IPSET_ADD_START_STORED_TIMEOUT, }; diff --git a/include/linux/netfilter/ipset/ip_set_comment.h b/include/linux/netfilter/ipset/ip_set_comment.h index 8d0248525957..8e2bab1e8e90 100644 --- a/include/linux/netfilter/ipset/ip_set_comment.h +++ b/include/linux/netfilter/ipset/ip_set_comment.h @@ -20,13 +20,14 @@ ip_set_comment_uget(struct nlattr *tb) * The kadt functions don't use the comment extensions in any way. */ static inline void -ip_set_init_comment(struct ip_set_comment *comment, +ip_set_init_comment(struct ip_set *set, struct ip_set_comment *comment, const struct ip_set_ext *ext) { struct ip_set_comment_rcu *c = rcu_dereference_protected(comment->c, 1); size_t len = ext->comment ? strlen(ext->comment) : 0; if (unlikely(c)) { + set->ext_size -= sizeof(*c) + strlen(c->str) + 1; kfree_rcu(c, rcu); rcu_assign_pointer(comment->c, NULL); } @@ -34,16 +35,17 @@ ip_set_init_comment(struct ip_set_comment *comment, return; if (unlikely(len > IPSET_MAX_COMMENT_SIZE)) len = IPSET_MAX_COMMENT_SIZE; - c = kzalloc(sizeof(*c) + len + 1, GFP_ATOMIC); + c = kmalloc(sizeof(*c) + len + 1, GFP_ATOMIC); if (unlikely(!c)) return; strlcpy(c->str, ext->comment, len + 1); + set->ext_size += sizeof(*c) + strlen(c->str) + 1; rcu_assign_pointer(comment->c, c); } /* Used only when dumping a set, protected by rcu_read_lock_bh() */ static inline int -ip_set_put_comment(struct sk_buff *skb, struct ip_set_comment *comment) +ip_set_put_comment(struct sk_buff *skb, const struct ip_set_comment *comment) { struct ip_set_comment_rcu *c = rcu_dereference_bh(comment->c); @@ -58,13 +60,14 @@ ip_set_put_comment(struct sk_buff *skb, struct ip_set_comment *comment) * of the set data anymore. */ static inline void -ip_set_comment_free(struct ip_set_comment *comment) +ip_set_comment_free(struct ip_set *set, struct ip_set_comment *comment) { struct ip_set_comment_rcu *c; c = rcu_dereference_protected(comment->c, 1); if (unlikely(!c)) return; + set->ext_size -= sizeof(*c) + strlen(c->str) + 1; kfree_rcu(c, rcu); rcu_assign_pointer(comment->c, NULL); } diff --git a/include/linux/netfilter/ipset/ip_set_counter.h b/include/linux/netfilter/ipset/ip_set_counter.h new file mode 100644 index 000000000000..bb6fba480118 --- /dev/null +++ b/include/linux/netfilter/ipset/ip_set_counter.h @@ -0,0 +1,75 @@ +#ifndef _IP_SET_COUNTER_H +#define _IP_SET_COUNTER_H + +/* Copyright (C) 2015 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifdef __KERNEL__ + +static inline void +ip_set_add_bytes(u64 bytes, struct ip_set_counter *counter) +{ + atomic64_add((long long)bytes, &(counter)->bytes); +} + +static inline void +ip_set_add_packets(u64 packets, struct ip_set_counter *counter) +{ + atomic64_add((long long)packets, &(counter)->packets); +} + +static inline u64 +ip_set_get_bytes(const struct ip_set_counter *counter) +{ + return (u64)atomic64_read(&(counter)->bytes); +} + +static inline u64 +ip_set_get_packets(const struct ip_set_counter *counter) +{ + return (u64)atomic64_read(&(counter)->packets); +} + +static inline void +ip_set_update_counter(struct ip_set_counter *counter, + const struct ip_set_ext *ext, + struct ip_set_ext *mext, u32 flags) +{ + if (ext->packets != ULLONG_MAX && + !(flags & IPSET_FLAG_SKIP_COUNTER_UPDATE)) { + ip_set_add_bytes(ext->bytes, counter); + ip_set_add_packets(ext->packets, counter); + } + if (flags & IPSET_FLAG_MATCH_COUNTERS) { + mext->packets = ip_set_get_packets(counter); + mext->bytes = ip_set_get_bytes(counter); + } +} + +static inline bool +ip_set_put_counter(struct sk_buff *skb, const struct ip_set_counter *counter) +{ + return nla_put_net64(skb, IPSET_ATTR_BYTES, + cpu_to_be64(ip_set_get_bytes(counter)), + IPSET_ATTR_PAD) || + nla_put_net64(skb, IPSET_ATTR_PACKETS, + cpu_to_be64(ip_set_get_packets(counter)), + IPSET_ATTR_PAD); +} + +static inline void +ip_set_init_counter(struct ip_set_counter *counter, + const struct ip_set_ext *ext) +{ + if (ext->bytes != ULLONG_MAX) + atomic64_set(&(counter)->bytes, (long long)(ext->bytes)); + if (ext->packets != ULLONG_MAX) + atomic64_set(&(counter)->packets, (long long)(ext->packets)); +} + +#endif /* __KERNEL__ */ +#endif /* _IP_SET_COUNTER_H */ diff --git a/include/linux/netfilter/ipset/ip_set_skbinfo.h b/include/linux/netfilter/ipset/ip_set_skbinfo.h new file mode 100644 index 000000000000..29d7ef2bc3fa --- /dev/null +++ b/include/linux/netfilter/ipset/ip_set_skbinfo.h @@ -0,0 +1,46 @@ +#ifndef _IP_SET_SKBINFO_H +#define _IP_SET_SKBINFO_H + +/* Copyright (C) 2015 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifdef __KERNEL__ + +static inline void +ip_set_get_skbinfo(struct ip_set_skbinfo *skbinfo, + const struct ip_set_ext *ext, + struct ip_set_ext *mext, u32 flags) +{ + mext->skbinfo = *skbinfo; +} + +static inline bool +ip_set_put_skbinfo(struct sk_buff *skb, const struct ip_set_skbinfo *skbinfo) +{ + /* Send nonzero parameters only */ + return ((skbinfo->skbmark || skbinfo->skbmarkmask) && + nla_put_net64(skb, IPSET_ATTR_SKBMARK, + cpu_to_be64((u64)skbinfo->skbmark << 32 | + skbinfo->skbmarkmask), + IPSET_ATTR_PAD)) || + (skbinfo->skbprio && + nla_put_net32(skb, IPSET_ATTR_SKBPRIO, + cpu_to_be32(skbinfo->skbprio))) || + (skbinfo->skbqueue && + nla_put_net16(skb, IPSET_ATTR_SKBQUEUE, + cpu_to_be16(skbinfo->skbqueue))); +} + +static inline void +ip_set_init_skbinfo(struct ip_set_skbinfo *skbinfo, + const struct ip_set_ext *ext) +{ + *skbinfo = ext->skbinfo; +} + +#endif /* __KERNEL__ */ +#endif /* _IP_SET_SKBINFO_H */ diff --git a/include/linux/netfilter/ipset/ip_set_timeout.h b/include/linux/netfilter/ipset/ip_set_timeout.h index 1d6a935c1ac5..bfb3531fd88a 100644 --- a/include/linux/netfilter/ipset/ip_set_timeout.h +++ b/include/linux/netfilter/ipset/ip_set_timeout.h @@ -40,7 +40,7 @@ ip_set_timeout_uget(struct nlattr *tb) } static inline bool -ip_set_timeout_expired(unsigned long *t) +ip_set_timeout_expired(const unsigned long *t) { return *t != IPSET_ELEM_PERMANENT && time_is_before_jiffies(*t); } @@ -63,7 +63,7 @@ ip_set_timeout_set(unsigned long *timeout, u32 value) } static inline u32 -ip_set_timeout_get(unsigned long *timeout) +ip_set_timeout_get(const unsigned long *timeout) { return *timeout == IPSET_ELEM_PERMANENT ? 0 : jiffies_to_msecs(*timeout - jiffies)/MSEC_PER_SEC; diff --git a/include/linux/netfilter/nf_conntrack_dccp.h b/include/linux/netfilter/nf_conntrack_dccp.h index 40dcc82058d1..ff721d7325cf 100644 --- a/include/linux/netfilter/nf_conntrack_dccp.h +++ b/include/linux/netfilter/nf_conntrack_dccp.h @@ -25,7 +25,7 @@ enum ct_dccp_roles { #define CT_DCCP_ROLE_MAX (__CT_DCCP_ROLE_MAX - 1) #ifdef __KERNEL__ -#include <net/netfilter/nf_conntrack_tuple.h> +#include <linux/netfilter/nf_conntrack_tuple_common.h> struct nf_ct_dccp { u_int8_t role[IP_CT_DIR_MAX]; diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 2ad1a2b289b5..5117e4d2ddfa 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -4,6 +4,7 @@ #include <linux/netdevice.h> #include <linux/static_key.h> +#include <linux/netfilter.h> #include <uapi/linux/netfilter/x_tables.h> /* Test a struct->invflags and a boolean for inequality */ @@ -17,14 +18,9 @@ * @target: the target extension * @matchinfo: per-match data * @targetinfo: per-target data - * @net network namespace through which the action was invoked - * @in: input netdevice - * @out: output netdevice + * @state: pointer to hook state this packet came from * @fragoff: packet is a fragment, this is the data offset * @thoff: position of transport header relative to skb->data - * @hook: hook number given packet came from - * @family: Actual NFPROTO_* through which the function is invoked - * (helpful when match->family == NFPROTO_UNSPEC) * * Fields written to by extensions: * @@ -38,15 +34,47 @@ struct xt_action_param { union { const void *matchinfo, *targinfo; }; - struct net *net; - const struct net_device *in, *out; + const struct nf_hook_state *state; int fragoff; unsigned int thoff; - unsigned int hooknum; - u_int8_t family; bool hotdrop; }; +static inline struct net *xt_net(const struct xt_action_param *par) +{ + return par->state->net; +} + +static inline struct net_device *xt_in(const struct xt_action_param *par) +{ + return par->state->in; +} + +static inline const char *xt_inname(const struct xt_action_param *par) +{ + return par->state->in->name; +} + +static inline struct net_device *xt_out(const struct xt_action_param *par) +{ + return par->state->out; +} + +static inline const char *xt_outname(const struct xt_action_param *par) +{ + return par->state->out->name; +} + +static inline unsigned int xt_hooknum(const struct xt_action_param *par) +{ + return par->state->hook; +} + +static inline u_int8_t xt_family(const struct xt_action_param *par) +{ + return par->state->pf; +} + /** * struct xt_mtchk_param - parameters for match extensions' * checkentry functions @@ -375,38 +403,14 @@ static inline unsigned long ifname_compare_aligned(const char *_a, return ret; } +struct xt_percpu_counter_alloc_state { + unsigned int off; + const char __percpu *mem; +}; -/* On SMP, ip(6)t_entry->counters.pcnt holds address of the - * real (percpu) counter. On !SMP, its just the packet count, - * so nothing needs to be done there. - * - * xt_percpu_counter_alloc returns the address of the percpu - * counter, or 0 on !SMP. We force an alignment of 16 bytes - * so that bytes/packets share a common cache line. - * - * Hence caller must use IS_ERR_VALUE to check for error, this - * allows us to return 0 for single core systems without forcing - * callers to deal with SMP vs. NONSMP issues. - */ -static inline unsigned long xt_percpu_counter_alloc(void) -{ - if (nr_cpu_ids > 1) { - void __percpu *res = __alloc_percpu(sizeof(struct xt_counters), - sizeof(struct xt_counters)); - - if (res == NULL) - return -ENOMEM; - - return (__force unsigned long) res; - } - - return 0; -} -static inline void xt_percpu_counter_free(u64 pcnt) -{ - if (nr_cpu_ids > 1) - free_percpu((void __percpu *) (unsigned long) pcnt); -} +bool xt_percpu_counter_alloc(struct xt_percpu_counter_alloc_state *state, + struct xt_counters *counter); +void xt_percpu_counter_free(struct xt_counters *cnt); static inline struct xt_counters * xt_get_this_cpu_counter(struct xt_counters *cnt) diff --git a/include/linux/netfilter_ingress.h b/include/linux/netfilter_ingress.h index 33e37fb41d5d..59476061de86 100644 --- a/include/linux/netfilter_ingress.h +++ b/include/linux/netfilter_ingress.h @@ -19,6 +19,7 @@ static inline int nf_hook_ingress(struct sk_buff *skb) { struct nf_hook_entry *e = rcu_dereference(skb->dev->nf_hooks_ingress); struct nf_hook_state state; + int ret; /* Must recheck the ingress hook head, in the event it became NULL * after the check in nf_hook_ingress_active evaluated to true. @@ -26,10 +27,14 @@ static inline int nf_hook_ingress(struct sk_buff *skb) if (unlikely(!e)) return 0; - nf_hook_state_init(&state, e, NF_NETDEV_INGRESS, INT_MIN, + nf_hook_state_init(&state, NF_NETDEV_INGRESS, NFPROTO_NETDEV, skb->dev, NULL, NULL, dev_net(skb->dev), NULL); - return nf_hook_slow(skb, &state); + ret = nf_hook_slow(skb, &state, e); + if (ret == 0) + return -1; + + return ret; } static inline void nf_hook_ingress_init(struct net_device *dev) diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h index b25ee9ffdbe6..1828900c9411 100644 --- a/include/linux/netpoll.h +++ b/include/linux/netpoll.h @@ -78,8 +78,11 @@ static inline void *netpoll_poll_lock(struct napi_struct *napi) struct net_device *dev = napi->dev; if (dev && dev->npinfo) { - spin_lock(&napi->poll_lock); - napi->poll_owner = smp_processor_id(); + int owner = smp_processor_id(); + + while (cmpxchg(&napi->poll_owner, -1, owner) != -1) + cpu_relax(); + return napi; } return NULL; @@ -89,10 +92,8 @@ static inline void netpoll_poll_unlock(void *have) { struct napi_struct *napi = have; - if (napi) { - napi->poll_owner = -1; - spin_unlock(&napi->poll_lock); - } + if (napi) + smp_store_release(&napi->poll_owner, -1); } static inline bool netpoll_tx_running(struct net_device *dev) diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h index 4341f32516d8..271b3fdf0070 100644 --- a/include/linux/of_fdt.h +++ b/include/linux/of_fdt.h @@ -71,6 +71,7 @@ extern int early_init_dt_scan_chosen_stdout(void); extern void early_init_fdt_scan_reserved_mem(void); extern void early_init_fdt_reserve_self(void); extern void early_init_dt_add_memory_arch(u64 base, u64 size); +extern int early_init_dt_mark_hotplug_memory_arch(u64 base, u64 size); extern int early_init_dt_reserve_memory_arch(phys_addr_t base, phys_addr_t size, bool no_map); extern void * early_init_dt_alloc_memory_arch(u64 size, u64 align); diff --git a/include/linux/of_mdio.h b/include/linux/of_mdio.h index 2ab233661ae5..a58cca8bcb29 100644 --- a/include/linux/of_mdio.h +++ b/include/linux/of_mdio.h @@ -29,6 +29,7 @@ struct phy_device *of_phy_attach(struct net_device *dev, extern struct mii_bus *of_mdio_find_bus(struct device_node *mdio_np); extern int of_mdio_parse_addr(struct device *dev, const struct device_node *np); extern int of_phy_register_fixed_link(struct device_node *np); +extern void of_phy_deregister_fixed_link(struct device_node *np); extern bool of_phy_is_fixed_link(struct device_node *np); #else /* CONFIG_OF */ @@ -83,6 +84,9 @@ static inline int of_phy_register_fixed_link(struct device_node *np) { return -ENOSYS; } +static inline void of_phy_deregister_fixed_link(struct device_node *np) +{ +} static inline bool of_phy_is_fixed_link(struct device_node *np) { return false; diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index dd15d39e1985..7dbe9148b2f8 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -374,16 +374,13 @@ static inline struct page *read_mapping_page(struct address_space *mapping, } /* - * Get the offset in PAGE_SIZE. - * (TODO: hugepage should have ->index in PAGE_SIZE) + * Get index of the page with in radix-tree + * (TODO: remove once hugetlb pages will have ->index in PAGE_SIZE) */ -static inline pgoff_t page_to_pgoff(struct page *page) +static inline pgoff_t page_to_index(struct page *page) { pgoff_t pgoff; - if (unlikely(PageHeadHuge(page))) - return page->index << compound_order(page); - if (likely(!PageTransTail(page))) return page->index; @@ -397,6 +394,18 @@ static inline pgoff_t page_to_pgoff(struct page *page) } /* + * Get the offset in PAGE_SIZE. + * (TODO: hugepage should have ->index in PAGE_SIZE) + */ +static inline pgoff_t page_to_pgoff(struct page *page) +{ + if (unlikely(PageHeadHuge(page))) + return page->index << compound_order(page); + + return page_to_index(page); +} + +/* * Return byte-offset into filesystem object for page. */ static inline loff_t page_offset(struct page *page) diff --git a/include/linux/pci.h b/include/linux/pci.h index 0e49f70dbd9b..30d6c162e053 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -244,6 +244,7 @@ struct pci_cap_saved_state { struct pci_cap_saved_data cap; }; +struct irq_affinity; struct pcie_link_state; struct pci_vpd; struct pci_sriov; @@ -332,7 +333,6 @@ struct pci_dev { * directly, use the values stored here. They might be different! */ unsigned int irq; - struct cpumask *irq_affinity; struct resource resource[DEVICE_COUNT_RESOURCE]; /* I/O and memory regions + expansion ROMs */ bool match_driver; /* Skip attaching driver */ @@ -1310,8 +1310,10 @@ static inline int pci_enable_msix_exact(struct pci_dev *dev, return rc; return 0; } -int pci_alloc_irq_vectors(struct pci_dev *dev, unsigned int min_vecs, - unsigned int max_vecs, unsigned int flags); +int pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs, + unsigned int max_vecs, unsigned int flags, + const struct irq_affinity *affd); + void pci_free_irq_vectors(struct pci_dev *dev); int pci_irq_vector(struct pci_dev *dev, unsigned int nr); const struct cpumask *pci_irq_get_affinity(struct pci_dev *pdev, int vec); @@ -1339,14 +1341,17 @@ static inline int pci_enable_msix_range(struct pci_dev *dev, static inline int pci_enable_msix_exact(struct pci_dev *dev, struct msix_entry *entries, int nvec) { return -ENOSYS; } -static inline int pci_alloc_irq_vectors(struct pci_dev *dev, - unsigned int min_vecs, unsigned int max_vecs, - unsigned int flags) + +static inline int +pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs, + unsigned int max_vecs, unsigned int flags, + const struct irq_affinity *aff_desc) { if (min_vecs > 1) return -EINVAL; return 1; } + static inline void pci_free_irq_vectors(struct pci_dev *dev) { } @@ -1364,6 +1369,14 @@ static inline const struct cpumask *pci_irq_get_affinity(struct pci_dev *pdev, } #endif +static inline int +pci_alloc_irq_vectors(struct pci_dev *dev, unsigned int min_vecs, + unsigned int max_vecs, unsigned int flags) +{ + return pci_alloc_irq_vectors_affinity(dev, min_vecs, max_vecs, flags, + NULL); +} + #ifdef CONFIG_PCIEPORTBUS extern bool pcie_ports_disabled; extern bool pcie_ports_auto; @@ -1928,6 +1941,20 @@ static inline int pci_pcie_type(const struct pci_dev *dev) return (pcie_caps_reg(dev) & PCI_EXP_FLAGS_TYPE) >> 4; } +static inline struct pci_dev *pcie_find_root_port(struct pci_dev *dev) +{ + while (1) { + if (!pci_is_pcie(dev)) + break; + if (pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT) + return dev; + if (!dev->bus->self) + break; + dev = dev->bus->self; + } + return NULL; +} + void pci_request_acs(void); bool pci_acs_enabled(struct pci_dev *pdev, u16 acs_flags); bool pci_acs_path_enabled(struct pci_dev *start, diff --git a/include/linux/phy.h b/include/linux/phy.h index e25f1830fbcf..f7d95f644eed 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -25,6 +25,7 @@ #include <linux/timer.h> #include <linux/workqueue.h> #include <linux/mod_devicetable.h> +#include <linux/phy_led_triggers.h> #include <linux/atomic.h> @@ -85,6 +86,21 @@ typedef enum { } phy_interface_t; /** + * phy_supported_speeds - return all speeds currently supported by a phy device + * @phy: The phy device to return supported speeds of. + * @speeds: buffer to store supported speeds in. + * @size: size of speeds buffer. + * + * Description: Returns the number of supported speeds, and + * fills the speeds * buffer with the supported speeds. If speeds buffer is + * too small to contain * all currently supported speeds, will return as + * many speeds as can fit. + */ +unsigned int phy_supported_speeds(struct phy_device *phy, + unsigned int *speeds, + unsigned int size); + +/** * It maps 'enum phy_interface_t' found in include/linux/phy.h * into the device tree binding of 'phy-mode', so that Ethernet * device driver can get phy interface from device tree. @@ -343,7 +359,7 @@ struct phy_c45_device_ids { * giving up on the current attempt at acquiring a link * irq: IRQ number of the PHY's interrupt (-1 if none) * phy_timer: The timer for handling the state machine - * phy_queue: A work_queue for the interrupt + * phy_queue: A work_queue for the phy_mac_interrupt * attached_dev: The attached enet driver's device instance ptr * adjust_link: Callback for the enet controller to respond to * changes in the link state. @@ -401,10 +417,19 @@ struct phy_device { u32 advertising; u32 lp_advertising; + /* Energy efficient ethernet modes which should be prohibited */ + u32 eee_broken_modes; + int autoneg; int link_timeout; +#ifdef CONFIG_LED_TRIGGER_PHY + struct phy_led_trigger *phy_led_triggers; + unsigned int phy_num_led_triggers; + struct phy_led_trigger *last_triggered; +#endif + /* * Interrupt number for this PHY * -1 means no interrupt @@ -425,6 +450,7 @@ struct phy_device { struct net_device *attached_dev; u8 mdix; + u8 mdix_ctrl; void (*adjust_link)(struct net_device *dev); }; @@ -589,6 +615,13 @@ struct phy_driver { void (*get_strings)(struct phy_device *dev, u8 *data); void (*get_stats)(struct phy_device *dev, struct ethtool_stats *stats, u64 *data); + + /* Get and Set PHY tunables */ + int (*get_tunable)(struct phy_device *dev, + struct ethtool_tunable *tuna, void *data); + int (*set_tunable)(struct phy_device *dev, + struct ethtool_tunable *tuna, + const void *data); }; #define to_phy_driver(d) container_of(to_mdio_common_driver(d), \ struct phy_driver, mdiodrv) @@ -764,6 +797,7 @@ void phy_detach(struct phy_device *phydev); void phy_start(struct phy_device *phydev); void phy_stop(struct phy_device *phydev); int phy_start_aneg(struct phy_device *phydev); +int phy_aneg_done(struct phy_device *phydev); int phy_stop_interrupts(struct phy_device *phydev); @@ -802,7 +836,8 @@ int phy_driver_register(struct phy_driver *new_driver, struct module *owner); int phy_drivers_register(struct phy_driver *new_driver, int n, struct module *owner); void phy_state_machine(struct work_struct *work); -void phy_change(struct work_struct *work); +void phy_change(struct phy_device *phydev); +void phy_change_work(struct work_struct *work); void phy_mac_interrupt(struct phy_device *phydev, int new_link); void phy_start_machine(struct phy_device *phydev); void phy_stop_machine(struct phy_device *phydev); @@ -825,6 +860,10 @@ int phy_register_fixup_for_id(const char *bus_id, int phy_register_fixup_for_uid(u32 phy_uid, u32 phy_uid_mask, int (*run)(struct phy_device *)); +int phy_unregister_fixup(const char *bus_id, u32 phy_uid, u32 phy_uid_mask); +int phy_unregister_fixup_for_id(const char *bus_id); +int phy_unregister_fixup_for_uid(u32 phy_uid, u32 phy_uid_mask); + int phy_init_eee(struct phy_device *phydev, bool clk_stop_enable); int phy_get_eee_err(struct phy_device *phydev); int phy_ethtool_set_eee(struct phy_device *phydev, struct ethtool_eee *data); @@ -836,6 +875,7 @@ int phy_ethtool_get_link_ksettings(struct net_device *ndev, struct ethtool_link_ksettings *cmd); int phy_ethtool_set_link_ksettings(struct net_device *ndev, const struct ethtool_link_ksettings *cmd); +int phy_ethtool_nway_reset(struct net_device *ndev); int __init mdio_bus_init(void); void mdio_bus_exit(void); diff --git a/include/linux/phy_led_triggers.h b/include/linux/phy_led_triggers.h new file mode 100644 index 000000000000..a2daea0a37d2 --- /dev/null +++ b/include/linux/phy_led_triggers.h @@ -0,0 +1,51 @@ +/* Copyright (C) 2016 National Instruments Corp. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ +#ifndef __PHY_LED_TRIGGERS +#define __PHY_LED_TRIGGERS + +struct phy_device; + +#ifdef CONFIG_LED_TRIGGER_PHY + +#include <linux/leds.h> + +#define PHY_LED_TRIGGER_SPEED_SUFFIX_SIZE 10 +#define PHY_MII_BUS_ID_SIZE (20 - 3) + +#define PHY_LINK_LED_TRIGGER_NAME_SIZE (PHY_MII_BUS_ID_SIZE + \ + FIELD_SIZEOF(struct mdio_device, addr)+\ + PHY_LED_TRIGGER_SPEED_SUFFIX_SIZE) + +struct phy_led_trigger { + struct led_trigger trigger; + char name[PHY_LINK_LED_TRIGGER_NAME_SIZE]; + unsigned int speed; +}; + + +extern int phy_led_triggers_register(struct phy_device *phy); +extern void phy_led_triggers_unregister(struct phy_device *phy); +extern void phy_led_trigger_change_speed(struct phy_device *phy); + +#else + +static inline int phy_led_triggers_register(struct phy_device *phy) +{ + return 0; +} +static inline void phy_led_triggers_unregister(struct phy_device *phy) { } +static inline void phy_led_trigger_change_speed(struct phy_device *phy) { } + +#endif + +#endif diff --git a/include/linux/pim.h b/include/linux/pim.h index e1d756f81348..0e81b2778ae0 100644 --- a/include/linux/pim.h +++ b/include/linux/pim.h @@ -1,6 +1,7 @@ #ifndef __LINUX_PIM_H #define __LINUX_PIM_H +#include <linux/skbuff.h> #include <asm/byteorder.h> /* Message types - V1 */ @@ -9,24 +10,86 @@ /* Message types - V2 */ #define PIM_VERSION 2 -#define PIM_REGISTER 1 + +/* RFC7761, sec 4.9: + * Type + * Types for specific PIM messages. PIM Types are: + * + * Message Type Destination + * --------------------------------------------------------------------- + * 0 = Hello Multicast to ALL-PIM-ROUTERS + * 1 = Register Unicast to RP + * 2 = Register-Stop Unicast to source of Register + * packet + * 3 = Join/Prune Multicast to ALL-PIM-ROUTERS + * 4 = Bootstrap Multicast to ALL-PIM-ROUTERS + * 5 = Assert Multicast to ALL-PIM-ROUTERS + * 6 = Graft (used in PIM-DM only) Unicast to RPF'(S) + * 7 = Graft-Ack (used in PIM-DM only) Unicast to source of Graft + * packet + * 8 = Candidate-RP-Advertisement Unicast to Domain's BSR + */ +enum { + PIM_TYPE_HELLO, + PIM_TYPE_REGISTER, + PIM_TYPE_REGISTER_STOP, + PIM_TYPE_JOIN_PRUNE, + PIM_TYPE_BOOTSTRAP, + PIM_TYPE_ASSERT, + PIM_TYPE_GRAFT, + PIM_TYPE_GRAFT_ACK, + PIM_TYPE_CANDIDATE_RP_ADV +}; #define PIM_NULL_REGISTER cpu_to_be32(0x40000000) -static inline bool ipmr_pimsm_enabled(void) -{ - return IS_BUILTIN(CONFIG_IP_PIMSM_V1) || IS_BUILTIN(CONFIG_IP_PIMSM_V2); -} +/* RFC7761, sec 4.9: + * The PIM header common to all PIM messages is: + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * |PIM Ver| Type | Reserved | Checksum | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +struct pimhdr { + __u8 type; + __u8 reserved; + __be16 csum; +}; /* PIMv2 register message header layout (ietf-draft-idmr-pimvsm-v2-00.ps */ -struct pimreghdr -{ +struct pimreghdr { __u8 type; __u8 reserved; __be16 csum; __be32 flags; }; -struct sk_buff; -extern int pim_rcv_v1(struct sk_buff *); +int pim_rcv_v1(struct sk_buff *skb); + +static inline bool ipmr_pimsm_enabled(void) +{ + return IS_BUILTIN(CONFIG_IP_PIMSM_V1) || IS_BUILTIN(CONFIG_IP_PIMSM_V2); +} + +static inline struct pimhdr *pim_hdr(const struct sk_buff *skb) +{ + return (struct pimhdr *)skb_transport_header(skb); +} + +static inline u8 pim_hdr_version(const struct pimhdr *pimhdr) +{ + return pimhdr->type >> 4; +} + +static inline u8 pim_hdr_type(const struct pimhdr *pimhdr) +{ + return pimhdr->type & 0xf; +} + +/* check if the address is 224.0.0.13, RFC7761 sec 4.3.1 */ +static inline bool pim_ipv4_all_pim_routers(__be32 addr) +{ + return addr == htonl(0xE000000D); +} #endif diff --git a/include/linux/pm-trace.h b/include/linux/pm-trace.h index ecbde7a5548e..7b78793f07d7 100644 --- a/include/linux/pm-trace.h +++ b/include/linux/pm-trace.h @@ -1,11 +1,17 @@ #ifndef PM_TRACE_H #define PM_TRACE_H +#include <linux/types.h> #ifdef CONFIG_PM_TRACE #include <asm/pm-trace.h> -#include <linux/types.h> extern int pm_trace_enabled; +extern bool pm_trace_rtc_abused; + +static inline bool pm_trace_rtc_valid(void) +{ + return !pm_trace_rtc_abused; +} static inline int pm_trace_is_enabled(void) { @@ -24,6 +30,7 @@ extern int show_trace_dev_match(char *buf, size_t size); #else +static inline bool pm_trace_rtc_valid(void) { return true; } static inline int pm_trace_is_enabled(void) { return 0; } #define TRACE_DEVICE(dev) do { } while (0) diff --git a/include/linux/pm.h b/include/linux/pm.h index 06eb353182ab..efa67b2dfee9 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -258,7 +258,7 @@ typedef struct pm_message { * example, if it detects that a child was unplugged while the system was * asleep). * - * Refer to Documentation/power/devices.txt for more information about the role + * Refer to Documentation/power/admin-guide/devices.rst for more information about the role * of the above callbacks in the system suspend process. * * There also are callbacks related to runtime power management of devices. diff --git a/include/linux/preempt.h b/include/linux/preempt.h index 75e4e30677f1..7eeceac52dea 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -65,19 +65,24 @@ /* * Are we doing bottom half or hardware interrupt processing? - * Are we in a softirq context? Interrupt context? - * in_softirq - Are we currently processing softirq or have bh disabled? - * in_serving_softirq - Are we currently processing softirq? + * + * in_irq() - We're in (hard) IRQ context + * in_softirq() - We have BH disabled, or are processing softirqs + * in_interrupt() - We're in NMI,IRQ,SoftIRQ context or have BH disabled + * in_serving_softirq() - We're in softirq context + * in_nmi() - We're in NMI context + * in_task() - We're in task context + * + * Note: due to the BH disabled confusion: in_softirq(),in_interrupt() really + * should not be used in new code. */ #define in_irq() (hardirq_count()) #define in_softirq() (softirq_count()) #define in_interrupt() (irq_count()) #define in_serving_softirq() (softirq_count() & SOFTIRQ_OFFSET) - -/* - * Are we in NMI context? - */ -#define in_nmi() (preempt_count() & NMI_MASK) +#define in_nmi() (preempt_count() & NMI_MASK) +#define in_task() (!(preempt_count() & \ + (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET))) /* * The preempt_count offset after preempt_disable(); diff --git a/include/linux/printk.h b/include/linux/printk.h index eac1af8502bb..3472cc6b7a60 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -10,6 +10,8 @@ extern const char linux_banner[]; extern const char linux_proc_banner[]; +#define PRINTK_MAX_SINGLE_HEADER_LEN 2 + static inline int printk_get_level(const char *buffer) { if (buffer[0] == KERN_SOH_ASCII && buffer[1]) { @@ -31,6 +33,14 @@ static inline const char *printk_skip_level(const char *buffer) return buffer; } +static inline const char *printk_skip_headers(const char *buffer) +{ + while (printk_get_level(buffer)) + buffer = printk_skip_level(buffer); + + return buffer; +} + #define CONSOLE_EXT_LOG_MAX 8192 /* printk's without a loglevel use this.. */ @@ -40,10 +50,15 @@ static inline const char *printk_skip_level(const char *buffer) #define CONSOLE_LOGLEVEL_SILENT 0 /* Mum's the word */ #define CONSOLE_LOGLEVEL_MIN 1 /* Minimum loglevel we let people use */ #define CONSOLE_LOGLEVEL_QUIET 4 /* Shhh ..., when booted with "quiet" */ -#define CONSOLE_LOGLEVEL_DEFAULT 7 /* anything MORE serious than KERN_DEBUG */ #define CONSOLE_LOGLEVEL_DEBUG 10 /* issue debug messages */ #define CONSOLE_LOGLEVEL_MOTORMOUTH 15 /* You can't shut this one up */ +/* + * Default used to be hard-coded at 7, we're now allowing it to be set from + * kernel config. + */ +#define CONSOLE_LOGLEVEL_DEFAULT CONFIG_CONSOLE_LOGLEVEL_DEFAULT + extern int console_printk[]; #define console_loglevel (console_printk[0]) diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index b97bf2ef996e..368c7ad06ae5 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -82,4 +82,8 @@ static inline struct proc_dir_entry *proc_net_mkdir( return proc_mkdir_data(name, 0, parent, net); } +struct ns_common; +int open_related_ns(struct ns_common *ns, + struct ns_common *(*get_ns)(struct ns_common *ns)); + #endif /* _LINUX_PROC_FS_H */ diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h index 5ad54fc66cf0..a026bfd089db 100644 --- a/include/linux/ptp_clock_kernel.h +++ b/include/linux/ptp_clock_kernel.h @@ -58,7 +58,14 @@ struct system_device_crosststamp; * * clock operations * + * @adjfine: Adjusts the frequency of the hardware clock. + * parameter scaled_ppm: Desired frequency offset from + * nominal frequency in parts per million, but with a + * 16 bit binary fractional field. + * * @adjfreq: Adjusts the frequency of the hardware clock. + * This method is deprecated. New drivers should implement + * the @adjfine method instead. * parameter delta: Desired frequency offset from nominal frequency * in parts per billion * @@ -108,6 +115,7 @@ struct ptp_clock_info { int n_pins; int pps; struct ptp_pin_desc *pin_config; + int (*adjfine)(struct ptp_clock_info *ptp, long scaled_ppm); int (*adjfreq)(struct ptp_clock_info *ptp, s32 delta); int (*adjtime)(struct ptp_clock_info *ptp, s64 delta); int (*gettime64)(struct ptp_clock_info *ptp, struct timespec64 *ts); @@ -122,30 +130,6 @@ struct ptp_clock_info { struct ptp_clock; -/** - * ptp_clock_register() - register a PTP hardware clock driver - * - * @info: Structure describing the new clock. - * @parent: Pointer to the parent device of the new clock. - * - * Returns a valid pointer on success or PTR_ERR on failure. If PHC - * support is missing at the configuration level, this function - * returns NULL, and drivers are expected to gracefully handle that - * case separately. - */ - -extern struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info, - struct device *parent); - -/** - * ptp_clock_unregister() - unregister a PTP hardware clock driver - * - * @ptp: The clock to remove from service. - */ - -extern int ptp_clock_unregister(struct ptp_clock *ptp); - - enum ptp_clock_events { PTP_CLOCK_ALARM, PTP_CLOCK_EXTTS, @@ -171,6 +155,31 @@ struct ptp_clock_event { }; }; +#if IS_REACHABLE(CONFIG_PTP_1588_CLOCK) + +/** + * ptp_clock_register() - register a PTP hardware clock driver + * + * @info: Structure describing the new clock. + * @parent: Pointer to the parent device of the new clock. + * + * Returns a valid pointer on success or PTR_ERR on failure. If PHC + * support is missing at the configuration level, this function + * returns NULL, and drivers are expected to gracefully handle that + * case separately. + */ + +extern struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info, + struct device *parent); + +/** + * ptp_clock_unregister() - unregister a PTP hardware clock driver + * + * @ptp: The clock to remove from service. + */ + +extern int ptp_clock_unregister(struct ptp_clock *ptp); + /** * ptp_clock_event() - notify the PTP layer about an event * @@ -202,4 +211,20 @@ extern int ptp_clock_index(struct ptp_clock *ptp); int ptp_find_pin(struct ptp_clock *ptp, enum ptp_pin_function func, unsigned int chan); +#else +static inline struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info, + struct device *parent) +{ return NULL; } +static inline int ptp_clock_unregister(struct ptp_clock *ptp) +{ return 0; } +static inline void ptp_clock_event(struct ptp_clock *ptp, + struct ptp_clock_event *event) +{ } +static inline int ptp_clock_index(struct ptp_clock *ptp) +{ return -1; } +static inline int ptp_find_pin(struct ptp_clock *ptp, + enum ptp_pin_function func, unsigned int chan) +{ return -1; } +#endif + #endif diff --git a/include/linux/qed/qed_chain.h b/include/linux/qed/qed_chain.h index 72d88cf3ca25..37dfba101c6c 100644 --- a/include/linux/qed/qed_chain.h +++ b/include/linux/qed/qed_chain.h @@ -56,23 +56,6 @@ struct qed_chain_pbl_u32 { u32 cons_page_idx; }; -struct qed_chain_pbl { - /* Base address of a pre-allocated buffer for pbl */ - dma_addr_t p_phys_table; - void *p_virt_table; - - /* Table for keeping the virtual addresses of the chain pages, - * respectively to the physical addresses in the pbl table. - */ - void **pp_virt_addr_tbl; - - /* Index to current used page by producer/consumer */ - union { - struct qed_chain_pbl_u16 pbl16; - struct qed_chain_pbl_u32 pbl32; - } u; -}; - struct qed_chain_u16 { /* Cyclic index of next element to produce/consme */ u16 prod_idx; @@ -86,46 +69,78 @@ struct qed_chain_u32 { }; struct qed_chain { - void *p_virt_addr; - dma_addr_t p_phys_addr; - void *p_prod_elem; - void *p_cons_elem; + /* fastpath portion of the chain - required for commands such + * as produce / consume. + */ + /* Point to next element to produce/consume */ + void *p_prod_elem; + void *p_cons_elem; + + /* Fastpath portions of the PBL [if exists] */ + struct { + /* Table for keeping the virtual addresses of the chain pages, + * respectively to the physical addresses in the pbl table. + */ + void **pp_virt_addr_tbl; - enum qed_chain_mode mode; - enum qed_chain_use_mode intended_use; /* used to produce/consume */ - enum qed_chain_cnt_type cnt_type; + union { + struct qed_chain_pbl_u16 u16; + struct qed_chain_pbl_u32 u32; + } c; + } pbl; union { struct qed_chain_u16 chain16; struct qed_chain_u32 chain32; } u; + /* Capacity counts only usable elements */ + u32 capacity; u32 page_cnt; - /* Number of elements - capacity is for usable elements only, - * while size will contain total number of elements [for entire chain]. + enum qed_chain_mode mode; + + /* Elements information for fast calculations */ + u16 elem_per_page; + u16 elem_per_page_mask; + u16 elem_size; + u16 next_page_mask; + u16 usable_per_page; + u8 elem_unusable; + + u8 cnt_type; + + /* Slowpath of the chain - required for initialization and destruction, + * but isn't involved in regular functionality. */ - u32 capacity; + + /* Base address of a pre-allocated buffer for pbl */ + struct { + dma_addr_t p_phys_table; + void *p_virt_table; + } pbl_sp; + + /* Address of first page of the chain - the address is required + * for fastpath operation [consume/produce] but only for the the SINGLE + * flavour which isn't considered fastpath [== SPQ]. + */ + void *p_virt_addr; + dma_addr_t p_phys_addr; + + /* Total number of elements [for entire chain] */ u32 size; - /* Elements information for fast calculations */ - u16 elem_per_page; - u16 elem_per_page_mask; - u16 elem_unusable; - u16 usable_per_page; - u16 elem_size; - u16 next_page_mask; - struct qed_chain_pbl pbl; + u8 intended_use; }; #define QED_CHAIN_PBL_ENTRY_SIZE (8) #define QED_CHAIN_PAGE_SIZE (0x1000) #define ELEMS_PER_PAGE(elem_size) (QED_CHAIN_PAGE_SIZE / (elem_size)) -#define UNUSABLE_ELEMS_PER_PAGE(elem_size, mode) \ - ((mode == QED_CHAIN_MODE_NEXT_PTR) ? \ - (1 + ((sizeof(struct qed_chain_next) - 1) / \ - (elem_size))) : 0) +#define UNUSABLE_ELEMS_PER_PAGE(elem_size, mode) \ + (((mode) == QED_CHAIN_MODE_NEXT_PTR) ? \ + (u8)(1 + ((sizeof(struct qed_chain_next) - 1) / \ + (elem_size))) : 0) #define USABLE_ELEMS_PER_PAGE(elem_size, mode) \ ((u32)(ELEMS_PER_PAGE(elem_size) - \ @@ -186,7 +201,7 @@ static inline u16 qed_chain_get_usable_per_page(struct qed_chain *p_chain) return p_chain->usable_per_page; } -static inline u16 qed_chain_get_unusable_per_page(struct qed_chain *p_chain) +static inline u8 qed_chain_get_unusable_per_page(struct qed_chain *p_chain) { return p_chain->elem_unusable; } @@ -198,7 +213,7 @@ static inline u32 qed_chain_get_page_cnt(struct qed_chain *p_chain) static inline dma_addr_t qed_chain_get_pbl_phys(struct qed_chain *p_chain) { - return p_chain->pbl.p_phys_table; + return p_chain->pbl_sp.p_phys_table; } /** @@ -214,10 +229,10 @@ static inline dma_addr_t qed_chain_get_pbl_phys(struct qed_chain *p_chain) static inline void qed_chain_advance_page(struct qed_chain *p_chain, void **p_next_elem, void *idx_to_inc, void *page_to_inc) - { struct qed_chain_next *p_next = NULL; u32 page_index = 0; + switch (p_chain->mode) { case QED_CHAIN_MODE_NEXT_PTR: p_next = *p_next_elem; @@ -305,7 +320,7 @@ static inline void *qed_chain_produce(struct qed_chain *p_chain) if ((p_chain->u.chain16.prod_idx & p_chain->elem_per_page_mask) == p_chain->next_page_mask) { p_prod_idx = &p_chain->u.chain16.prod_idx; - p_prod_page_idx = &p_chain->pbl.u.pbl16.prod_page_idx; + p_prod_page_idx = &p_chain->pbl.c.u16.prod_page_idx; qed_chain_advance_page(p_chain, &p_chain->p_prod_elem, p_prod_idx, p_prod_page_idx); } @@ -314,7 +329,7 @@ static inline void *qed_chain_produce(struct qed_chain *p_chain) if ((p_chain->u.chain32.prod_idx & p_chain->elem_per_page_mask) == p_chain->next_page_mask) { p_prod_idx = &p_chain->u.chain32.prod_idx; - p_prod_page_idx = &p_chain->pbl.u.pbl32.prod_page_idx; + p_prod_page_idx = &p_chain->pbl.c.u32.prod_page_idx; qed_chain_advance_page(p_chain, &p_chain->p_prod_elem, p_prod_idx, p_prod_page_idx); } @@ -378,7 +393,7 @@ static inline void *qed_chain_consume(struct qed_chain *p_chain) if ((p_chain->u.chain16.cons_idx & p_chain->elem_per_page_mask) == p_chain->next_page_mask) { p_cons_idx = &p_chain->u.chain16.cons_idx; - p_cons_page_idx = &p_chain->pbl.u.pbl16.cons_page_idx; + p_cons_page_idx = &p_chain->pbl.c.u16.cons_page_idx; qed_chain_advance_page(p_chain, &p_chain->p_cons_elem, p_cons_idx, p_cons_page_idx); } @@ -387,8 +402,8 @@ static inline void *qed_chain_consume(struct qed_chain *p_chain) if ((p_chain->u.chain32.cons_idx & p_chain->elem_per_page_mask) == p_chain->next_page_mask) { p_cons_idx = &p_chain->u.chain32.cons_idx; - p_cons_page_idx = &p_chain->pbl.u.pbl32.cons_page_idx; - qed_chain_advance_page(p_chain, &p_chain->p_cons_elem, + p_cons_page_idx = &p_chain->pbl.c.u32.cons_page_idx; + qed_chain_advance_page(p_chain, &p_chain->p_cons_elem, p_cons_idx, p_cons_page_idx); } p_chain->u.chain32.cons_idx++; @@ -429,25 +444,26 @@ static inline void qed_chain_reset(struct qed_chain *p_chain) u32 reset_val = p_chain->page_cnt - 1; if (is_chain_u16(p_chain)) { - p_chain->pbl.u.pbl16.prod_page_idx = (u16)reset_val; - p_chain->pbl.u.pbl16.cons_page_idx = (u16)reset_val; + p_chain->pbl.c.u16.prod_page_idx = (u16)reset_val; + p_chain->pbl.c.u16.cons_page_idx = (u16)reset_val; } else { - p_chain->pbl.u.pbl32.prod_page_idx = reset_val; - p_chain->pbl.u.pbl32.cons_page_idx = reset_val; + p_chain->pbl.c.u32.prod_page_idx = reset_val; + p_chain->pbl.c.u32.cons_page_idx = reset_val; } } switch (p_chain->intended_use) { - case QED_CHAIN_USE_TO_CONSUME_PRODUCE: - case QED_CHAIN_USE_TO_PRODUCE: - /* Do nothing */ - break; - case QED_CHAIN_USE_TO_CONSUME: /* produce empty elements */ for (i = 0; i < p_chain->capacity; i++) qed_chain_recycle_consumed(p_chain); break; + + case QED_CHAIN_USE_TO_CONSUME_PRODUCE: + case QED_CHAIN_USE_TO_PRODUCE: + default: + /* Do nothing */ + break; } } @@ -473,13 +489,13 @@ static inline void qed_chain_init_params(struct qed_chain *p_chain, p_chain->p_virt_addr = NULL; p_chain->p_phys_addr = 0; p_chain->elem_size = elem_size; - p_chain->intended_use = intended_use; + p_chain->intended_use = (u8)intended_use; p_chain->mode = mode; - p_chain->cnt_type = cnt_type; + p_chain->cnt_type = (u8)cnt_type; - p_chain->elem_per_page = ELEMS_PER_PAGE(elem_size); + p_chain->elem_per_page = ELEMS_PER_PAGE(elem_size); p_chain->usable_per_page = USABLE_ELEMS_PER_PAGE(elem_size, mode); - p_chain->elem_per_page_mask = p_chain->elem_per_page - 1; + p_chain->elem_per_page_mask = p_chain->elem_per_page - 1; p_chain->elem_unusable = UNUSABLE_ELEMS_PER_PAGE(elem_size, mode); p_chain->next_page_mask = (p_chain->usable_per_page & p_chain->elem_per_page_mask); @@ -488,8 +504,8 @@ static inline void qed_chain_init_params(struct qed_chain *p_chain, p_chain->capacity = p_chain->usable_per_page * page_cnt; p_chain->size = p_chain->elem_per_page * page_cnt; - p_chain->pbl.p_phys_table = 0; - p_chain->pbl.p_virt_table = NULL; + p_chain->pbl_sp.p_phys_table = 0; + p_chain->pbl_sp.p_virt_table = NULL; p_chain->pbl.pp_virt_addr_tbl = NULL; } @@ -530,8 +546,8 @@ static inline void qed_chain_init_pbl_mem(struct qed_chain *p_chain, dma_addr_t p_phys_pbl, void **pp_virt_addr_tbl) { - p_chain->pbl.p_phys_table = p_phys_pbl; - p_chain->pbl.p_virt_table = p_virt_pbl; + p_chain->pbl_sp.p_phys_table = p_phys_pbl; + p_chain->pbl_sp.p_virt_table = p_virt_pbl; p_chain->pbl.pp_virt_addr_tbl = pp_virt_addr_tbl; } diff --git a/include/linux/qed/qed_eth_if.h b/include/linux/qed/qed_eth_if.h index 33c24ebc9b7f..7a52f7c58c37 100644 --- a/include/linux/qed/qed_eth_if.h +++ b/include/linux/qed/qed_eth_if.h @@ -15,6 +15,29 @@ #include <linux/qed/qed_if.h> #include <linux/qed/qed_iov_if.h> +struct qed_queue_start_common_params { + /* Should always be relative to entity sending this. */ + u8 vport_id; + u16 queue_id; + + /* Relative, but relevant only for PFs */ + u8 stats_id; + + /* These are always absolute */ + u16 sb; + u8 sb_idx; +}; + +struct qed_rxq_start_ret_params { + void __iomem *p_prod; + void *p_handle; +}; + +struct qed_txq_start_ret_params { + void __iomem *p_doorbell; + void *p_handle; +}; + struct qed_dev_eth_info { struct qed_dev_info common; @@ -22,7 +45,8 @@ struct qed_dev_eth_info { u8 num_tc; u8 port_mac[ETH_ALEN]; - u8 num_vlan_filters; + u16 num_vlan_filters; + u16 num_mac_filters; /* Legacy VF - this affects the datapath, so qede has to know */ bool is_legacy; @@ -55,18 +79,6 @@ struct qed_start_vport_params { bool clear_stats; }; -struct qed_stop_rxq_params { - u8 rss_id; - u8 rx_queue_id; - u8 vport_id; - bool eq_completion_only; -}; - -struct qed_stop_txq_params { - u8 rss_id; - u8 tx_queue_id; -}; - enum qed_filter_rx_mode_type { QED_FILTER_RX_MODE_TYPE_REGULAR, QED_FILTER_RX_MODE_TYPE_MULTI_PROMISC, @@ -111,15 +123,6 @@ struct qed_filter_params { union qed_filter_type_params filter; }; -struct qed_queue_start_common_params { - u8 rss_id; - u8 queue_id; - u8 vport_id; - u16 sb; - u16 sb_idx; - u16 vf_qid; -}; - struct qed_tunn_params { u16 vxlan_port; u8 update_vxlan_port; @@ -129,7 +132,7 @@ struct qed_tunn_params { struct qed_eth_cb_ops { struct qed_common_cb_ops common; - void (*force_mac) (void *dev, u8 *mac); + void (*force_mac) (void *dev, u8 *mac, bool forced); }; #ifdef CONFIG_DCB @@ -219,24 +222,24 @@ struct qed_eth_ops { struct qed_update_vport_params *params); int (*q_rx_start)(struct qed_dev *cdev, + u8 rss_num, struct qed_queue_start_common_params *params, u16 bd_max_bytes, dma_addr_t bd_chain_phys_addr, dma_addr_t cqe_pbl_addr, u16 cqe_pbl_size, - void __iomem **pp_prod); + struct qed_rxq_start_ret_params *ret_params); - int (*q_rx_stop)(struct qed_dev *cdev, - struct qed_stop_rxq_params *params); + int (*q_rx_stop)(struct qed_dev *cdev, u8 rss_id, void *handle); int (*q_tx_start)(struct qed_dev *cdev, + u8 rss_num, struct qed_queue_start_common_params *params, dma_addr_t pbl_addr, u16 pbl_size, - void __iomem **pp_doorbell); + struct qed_txq_start_ret_params *ret_params); - int (*q_tx_stop)(struct qed_dev *cdev, - struct qed_stop_txq_params *params); + int (*q_tx_stop)(struct qed_dev *cdev, u8 rss_id, void *handle); int (*filter_config)(struct qed_dev *cdev, struct qed_filter_params *params); diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index 8978a60371f4..4b454f4f5b25 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -166,6 +166,7 @@ struct qed_iscsi_pf_params { u32 max_cwnd; u16 cq_num_entries; u16 cmdq_num_entries; + u32 two_msl_timer; u16 dup_ack_threshold; u16 tx_sws_timer; u16 min_rto; @@ -267,11 +268,15 @@ struct qed_dev_info { u8 mf_mode; bool tx_switching; bool rdma_supported; + u16 mtu; + + bool wol_support; }; enum qed_sb_type { QED_SB_TYPE_L2_QUEUE, QED_SB_TYPE_CNQ, + QED_SB_TYPE_STORAGE, }; enum qed_protocol { @@ -401,6 +406,15 @@ struct qed_selftest_ops { * @return 0 on success, error otherwise. */ int (*selftest_clock)(struct qed_dev *cdev); + +/** + * @brief selftest_nvram - Perform nvram test + * + * @param cdev + * + * @return 0 on success, error otherwise. + */ + int (*selftest_nvram) (struct qed_dev *cdev); }; struct qed_common_ops { @@ -554,6 +568,41 @@ struct qed_common_ops { */ int (*set_led)(struct qed_dev *cdev, enum qed_led_mode mode); + +/** + * @brief update_drv_state - API to inform the change in the driver state. + * + * @param cdev + * @param active + * + */ + int (*update_drv_state)(struct qed_dev *cdev, bool active); + +/** + * @brief update_mac - API to inform the change in the mac address + * + * @param cdev + * @param mac + * + */ + int (*update_mac)(struct qed_dev *cdev, u8 *mac); + +/** + * @brief update_mtu - API to inform the change in the mtu + * + * @param cdev + * @param mtu + * + */ + int (*update_mtu)(struct qed_dev *cdev, u16 mtu); + +/** + * @brief update_wol - update of changes in the WoL configuration + * + * @param cdev + * @param enabled - true iff WoL should be enabled. + */ + int (*update_wol) (struct qed_dev *cdev, bool enabled); }; #define MASK_FIELD(_name, _value) \ diff --git a/include/linux/qed/qed_iscsi_if.h b/include/linux/qed/qed_iscsi_if.h new file mode 100644 index 000000000000..d27912480cb3 --- /dev/null +++ b/include/linux/qed/qed_iscsi_if.h @@ -0,0 +1,229 @@ +/* QLogic qed NIC Driver + * Copyright (c) 2015 QLogic Corporation + * + * This software is available under the terms of the GNU General Public License + * (GPL) Version 2, available from the file COPYING in the main directory of + * this source tree. + */ + +#ifndef _QED_ISCSI_IF_H +#define _QED_ISCSI_IF_H +#include <linux/types.h> +#include <linux/qed/qed_if.h> + +typedef int (*iscsi_event_cb_t) (void *context, + u8 fw_event_code, void *fw_handle); +struct qed_iscsi_stats { + u64 iscsi_rx_bytes_cnt; + u64 iscsi_rx_packet_cnt; + u64 iscsi_rx_new_ooo_isle_events_cnt; + u32 iscsi_cmdq_threshold_cnt; + u32 iscsi_rq_threshold_cnt; + u32 iscsi_immq_threshold_cnt; + + u64 iscsi_rx_dropped_pdus_task_not_valid; + + u64 iscsi_rx_data_pdu_cnt; + u64 iscsi_rx_r2t_pdu_cnt; + u64 iscsi_rx_total_pdu_cnt; + + u64 iscsi_tx_go_to_slow_start_event_cnt; + u64 iscsi_tx_fast_retransmit_event_cnt; + + u64 iscsi_tx_data_pdu_cnt; + u64 iscsi_tx_r2t_pdu_cnt; + u64 iscsi_tx_total_pdu_cnt; + + u64 iscsi_tx_bytes_cnt; + u64 iscsi_tx_packet_cnt; +}; + +struct qed_dev_iscsi_info { + struct qed_dev_info common; + + void __iomem *primary_dbq_rq_addr; + void __iomem *secondary_bdq_rq_addr; +}; + +struct qed_iscsi_id_params { + u8 mac[ETH_ALEN]; + u32 ip[4]; + u16 port; +}; + +struct qed_iscsi_params_offload { + u8 layer_code; + dma_addr_t sq_pbl_addr; + u32 initial_ack; + + struct qed_iscsi_id_params src; + struct qed_iscsi_id_params dst; + u16 vlan_id; + u8 tcp_flags; + u8 ip_version; + u8 default_cq; + + u8 ka_max_probe_cnt; + u8 dup_ack_theshold; + u32 rcv_next; + u32 snd_una; + u32 snd_next; + u32 snd_max; + u32 snd_wnd; + u32 rcv_wnd; + u32 snd_wl1; + u32 cwnd; + u32 ss_thresh; + u16 srtt; + u16 rtt_var; + u32 ts_time; + u32 ts_recent; + u32 ts_recent_age; + u32 total_rt; + u32 ka_timeout_delta; + u32 rt_timeout_delta; + u8 dup_ack_cnt; + u8 snd_wnd_probe_cnt; + u8 ka_probe_cnt; + u8 rt_cnt; + u32 flow_label; + u32 ka_timeout; + u32 ka_interval; + u32 max_rt_time; + u32 initial_rcv_wnd; + u8 ttl; + u8 tos_or_tc; + u16 remote_port; + u16 local_port; + u16 mss; + u8 snd_wnd_scale; + u8 rcv_wnd_scale; + u32 ts_ticks_per_second; + u16 da_timeout_value; + u8 ack_frequency; +}; + +struct qed_iscsi_params_update { + u8 update_flag; +#define QED_ISCSI_CONN_HD_EN BIT(0) +#define QED_ISCSI_CONN_DD_EN BIT(1) +#define QED_ISCSI_CONN_INITIAL_R2T BIT(2) +#define QED_ISCSI_CONN_IMMEDIATE_DATA BIT(3) + + u32 max_seq_size; + u32 max_recv_pdu_length; + u32 max_send_pdu_length; + u32 first_seq_length; + u32 exp_stat_sn; +}; + +#define MAX_TID_BLOCKS_ISCSI (512) +struct qed_iscsi_tid { + u32 size; /* In bytes per task */ + u32 num_tids_per_block; + u8 *blocks[MAX_TID_BLOCKS_ISCSI]; +}; + +struct qed_iscsi_cb_ops { + struct qed_common_cb_ops common; +}; + +/** + * struct qed_iscsi_ops - qed iSCSI operations. + * @common: common operations pointer + * @ll2: light L2 operations pointer + * @fill_dev_info: fills iSCSI specific information + * @param cdev + * @param info + * @return 0 on sucesss, otherwise error value. + * @register_ops: register iscsi operations + * @param cdev + * @param ops - specified using qed_iscsi_cb_ops + * @param cookie - driver private + * @start: iscsi in FW + * @param cdev + * @param tasks - qed will fill information about tasks + * return 0 on success, otherwise error value. + * @stop: iscsi in FW + * @param cdev + * return 0 on success, otherwise error value. + * @acquire_conn: acquire a new iscsi connection + * @param cdev + * @param handle - qed will fill handle that should be + * used henceforth as identifier of the + * connection. + * @param p_doorbell - qed will fill the address of the + * doorbell. + * @return 0 on sucesss, otherwise error value. + * @release_conn: release a previously acquired iscsi connection + * @param cdev + * @param handle - the connection handle. + * @return 0 on success, otherwise error value. + * @offload_conn: configures an offloaded connection + * @param cdev + * @param handle - the connection handle. + * @param conn_info - the configuration to use for the + * offload. + * @return 0 on success, otherwise error value. + * @update_conn: updates an offloaded connection + * @param cdev + * @param handle - the connection handle. + * @param conn_info - the configuration to use for the + * offload. + * @return 0 on success, otherwise error value. + * @destroy_conn: stops an offloaded connection + * @param cdev + * @param handle - the connection handle. + * @return 0 on success, otherwise error value. + * @clear_sq: clear all task in sq + * @param cdev + * @param handle - the connection handle. + * @return 0 on success, otherwise error value. + * @get_stats: iSCSI related statistics + * @param cdev + * @param stats - pointer to struck that would be filled + * we stats + * @return 0 on success, error otherwise. + */ +struct qed_iscsi_ops { + const struct qed_common_ops *common; + + const struct qed_ll2_ops *ll2; + + int (*fill_dev_info)(struct qed_dev *cdev, + struct qed_dev_iscsi_info *info); + + void (*register_ops)(struct qed_dev *cdev, + struct qed_iscsi_cb_ops *ops, void *cookie); + + int (*start)(struct qed_dev *cdev, + struct qed_iscsi_tid *tasks, + void *event_context, iscsi_event_cb_t async_event_cb); + + int (*stop)(struct qed_dev *cdev); + + int (*acquire_conn)(struct qed_dev *cdev, + u32 *handle, + u32 *fw_cid, void __iomem **p_doorbell); + + int (*release_conn)(struct qed_dev *cdev, u32 handle); + + int (*offload_conn)(struct qed_dev *cdev, + u32 handle, + struct qed_iscsi_params_offload *conn_info); + + int (*update_conn)(struct qed_dev *cdev, + u32 handle, + struct qed_iscsi_params_update *conn_info); + + int (*destroy_conn)(struct qed_dev *cdev, u32 handle, u8 abrt_conn); + + int (*clear_sq)(struct qed_dev *cdev, u32 handle); + + int (*get_stats)(struct qed_dev *cdev, + struct qed_iscsi_stats *stats); +}; + +const struct qed_iscsi_ops *qed_get_iscsi_ops(void); +void qed_put_iscsi_ops(void); +#endif diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index af3581b8a451..744486057e9e 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -80,14 +80,11 @@ static inline bool radix_tree_is_internal_node(void *ptr) #define RADIX_TREE_MAX_PATH (DIV_ROUND_UP(RADIX_TREE_INDEX_BITS, \ RADIX_TREE_MAP_SHIFT)) -/* Internally used bits of node->count */ -#define RADIX_TREE_COUNT_SHIFT (RADIX_TREE_MAP_SHIFT + 1) -#define RADIX_TREE_COUNT_MASK ((1UL << RADIX_TREE_COUNT_SHIFT) - 1) - struct radix_tree_node { - unsigned char shift; /* Bits remaining in each slot */ - unsigned char offset; /* Slot offset in parent */ - unsigned int count; + unsigned char shift; /* Bits remaining in each slot */ + unsigned char offset; /* Slot offset in parent */ + unsigned char count; /* Total entry count */ + unsigned char exceptional; /* Exceptional entry count */ union { struct { /* Used when ascending tree */ @@ -248,20 +245,6 @@ static inline int radix_tree_exception(void *arg) return unlikely((unsigned long)arg & RADIX_TREE_ENTRY_MASK); } -/** - * radix_tree_replace_slot - replace item in a slot - * @pslot: pointer to slot, returned by radix_tree_lookup_slot - * @item: new item to store in the slot. - * - * For use with radix_tree_lookup_slot(). Caller must hold tree write locked - * across slot lookup and replacement. - */ -static inline void radix_tree_replace_slot(void **pslot, void *item) -{ - BUG_ON(radix_tree_is_internal_node(item)); - rcu_assign_pointer(*pslot, item); -} - int __radix_tree_create(struct radix_tree_root *root, unsigned long index, unsigned order, struct radix_tree_node **nodep, void ***slotp); @@ -276,7 +259,14 @@ void *__radix_tree_lookup(struct radix_tree_root *root, unsigned long index, struct radix_tree_node **nodep, void ***slotp); void *radix_tree_lookup(struct radix_tree_root *, unsigned long); void **radix_tree_lookup_slot(struct radix_tree_root *, unsigned long); -bool __radix_tree_delete_node(struct radix_tree_root *root, +typedef void (*radix_tree_update_node_t)(struct radix_tree_node *, void *); +void __radix_tree_replace(struct radix_tree_root *root, + struct radix_tree_node *node, + void **slot, void *item, + radix_tree_update_node_t update_node, void *private); +void radix_tree_replace_slot(struct radix_tree_root *root, + void **slot, void *item); +void __radix_tree_delete_node(struct radix_tree_root *root, struct radix_tree_node *node); void *radix_tree_delete_item(struct radix_tree_root *, unsigned long, void *); void *radix_tree_delete(struct radix_tree_root *, unsigned long); diff --git a/include/linux/rculist.h b/include/linux/rculist.h index 8beb98dcf14f..4f7a9561b8c4 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -45,19 +45,17 @@ static inline void INIT_LIST_HEAD_RCU(struct list_head *list) * This is only for internal list manipulation where we know * the prev/next entries already! */ -#ifndef CONFIG_DEBUG_LIST static inline void __list_add_rcu(struct list_head *new, struct list_head *prev, struct list_head *next) { + if (!__list_add_valid(new, prev, next)) + return; + new->next = next; new->prev = prev; rcu_assign_pointer(list_next_rcu(prev), new); next->prev = new; } -#else -void __list_add_rcu(struct list_head *new, - struct list_head *prev, struct list_head *next); -#endif /** * list_add_rcu - add a new entry to rcu-protected list diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index 4acc552e9279..b6d4568795a7 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -198,4 +198,10 @@ enum ring_buffer_flags { RB_FL_OVERWRITE = 1 << 0, }; +#ifdef CONFIG_RING_BUFFER +int trace_rb_cpu_prepare(unsigned int cpu, struct hlist_node *node); +#else +#define trace_rb_cpu_prepare NULL +#endif + #endif /* _LINUX_RING_BUFFER_H */ diff --git a/include/linux/rmap.h b/include/linux/rmap.h index b46bb5620a76..15321fb1df6b 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -137,11 +137,19 @@ static inline void anon_vma_unlock_read(struct anon_vma *anon_vma) * anon_vma helper functions. */ void anon_vma_init(void); /* create anon_vma_cachep */ -int anon_vma_prepare(struct vm_area_struct *); +int __anon_vma_prepare(struct vm_area_struct *); void unlink_anon_vmas(struct vm_area_struct *); int anon_vma_clone(struct vm_area_struct *, struct vm_area_struct *); int anon_vma_fork(struct vm_area_struct *, struct vm_area_struct *); +static inline int anon_vma_prepare(struct vm_area_struct *vma) +{ + if (likely(vma->anon_vma)) + return 0; + + return __anon_vma_prepare(vma); +} + static inline void anon_vma_merge(struct vm_area_struct *vma, struct vm_area_struct *next) { diff --git a/include/linux/sched.h b/include/linux/sched.h index 348f51b0ec92..0e90f2973719 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -262,20 +262,9 @@ extern char ___assert_task_state[1 - 2*!!( #define set_task_state(tsk, state_value) \ do { \ (tsk)->task_state_change = _THIS_IP_; \ - smp_store_mb((tsk)->state, (state_value)); \ + smp_store_mb((tsk)->state, (state_value)); \ } while (0) -/* - * set_current_state() includes a barrier so that the write of current->state - * is correctly serialised wrt the caller's subsequent test of whether to - * actually sleep: - * - * set_current_state(TASK_UNINTERRUPTIBLE); - * if (do_i_need_to_sleep()) - * schedule(); - * - * If the caller does not need such serialisation then use __set_current_state() - */ #define __set_current_state(state_value) \ do { \ current->task_state_change = _THIS_IP_; \ @@ -284,11 +273,19 @@ extern char ___assert_task_state[1 - 2*!!( #define set_current_state(state_value) \ do { \ current->task_state_change = _THIS_IP_; \ - smp_store_mb(current->state, (state_value)); \ + smp_store_mb(current->state, (state_value)); \ } while (0) #else +/* + * @tsk had better be current, or you get to keep the pieces. + * + * The only reason is that computing current can be more expensive than + * using a pointer that's already available. + * + * Therefore, see set_current_state(). + */ #define __set_task_state(tsk, state_value) \ do { (tsk)->state = (state_value); } while (0) #define set_task_state(tsk, state_value) \ @@ -299,11 +296,34 @@ extern char ___assert_task_state[1 - 2*!!( * is correctly serialised wrt the caller's subsequent test of whether to * actually sleep: * + * for (;;) { * set_current_state(TASK_UNINTERRUPTIBLE); - * if (do_i_need_to_sleep()) - * schedule(); + * if (!need_sleep) + * break; + * + * schedule(); + * } + * __set_current_state(TASK_RUNNING); + * + * If the caller does not need such serialisation (because, for instance, the + * condition test and condition change and wakeup are under the same lock) then + * use __set_current_state(). + * + * The above is typically ordered against the wakeup, which does: + * + * need_sleep = false; + * wake_up_state(p, TASK_UNINTERRUPTIBLE); + * + * Where wake_up_state() (and all other wakeup primitives) imply enough + * barriers to order the store of the variable against wakeup. * - * If the caller does not need such serialisation then use __set_current_state() + * Wakeup will do: if (@state & p->state) p->state = TASK_RUNNING, that is, + * once it observes the TASK_UNINTERRUPTIBLE store the waking CPU can issue a + * TASK_RUNNING store which can collide with __set_current_state(TASK_RUNNING). + * + * This is obviously fine, since they both store the exact same value. + * + * Also see the comments of try_to_wake_up(). */ #define __set_current_state(state_value) \ do { current->state = (state_value); } while (0) @@ -520,7 +540,11 @@ static inline int get_dumpable(struct mm_struct *mm) /* leave room for more dump flags */ #define MMF_VM_MERGEABLE 16 /* KSM may merge identical pages */ #define MMF_VM_HUGEPAGE 17 /* set when VM_HUGEPAGE is set on vma */ -#define MMF_EXE_FILE_CHANGED 18 /* see prctl_set_mm_exe_file() */ +/* + * This one-shot flag is dropped due to necessity of changing exe once again + * on NFS restore + */ +//#define MMF_EXE_FILE_CHANGED 18 /* see prctl_set_mm_exe_file() */ #define MMF_HAS_UPROBES 19 /* has uprobes */ #define MMF_RECALC_UPROBES 20 /* MMF_HAS_UPROBES can be wrong */ @@ -989,7 +1013,7 @@ enum cpu_idle_type { * already in a wake queue, the wakeup will happen soon and the second * waker can just skip it. * - * The WAKE_Q macro declares and initializes the list head. + * The DEFINE_WAKE_Q macro declares and initializes the list head. * wake_up_q() does NOT reinitialize the list; it's expected to be * called near the end of a function, where the fact that the queue is * not used again will be easy to see by inspection. @@ -1009,7 +1033,7 @@ struct wake_q_head { #define WAKE_Q_TAIL ((struct wake_q_node *) 0x01) -#define WAKE_Q(name) \ +#define DEFINE_WAKE_Q(name) \ struct wake_q_head name = { WAKE_Q_TAIL, &name.first } extern void wake_q_add(struct wake_q_head *head, @@ -1057,6 +1081,8 @@ static inline int cpu_numa_flags(void) } #endif +extern int arch_asym_cpu_priority(int cpu); + struct sched_domain_attr { int relax_domain_level; }; @@ -1627,7 +1653,10 @@ struct task_struct { int __user *set_child_tid; /* CLONE_CHILD_SETTID */ int __user *clear_child_tid; /* CLONE_CHILD_CLEARTID */ - cputime_t utime, stime, utimescaled, stimescaled; + cputime_t utime, stime; +#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME + cputime_t utimescaled, stimescaled; +#endif cputime_t gtime; struct prev_cputime prev_cputime; #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN @@ -2220,34 +2249,38 @@ struct task_struct *try_get_task_struct(struct task_struct **ptask); #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN extern void task_cputime(struct task_struct *t, cputime_t *utime, cputime_t *stime); -extern void task_cputime_scaled(struct task_struct *t, - cputime_t *utimescaled, cputime_t *stimescaled); extern cputime_t task_gtime(struct task_struct *t); #else static inline void task_cputime(struct task_struct *t, cputime_t *utime, cputime_t *stime) { - if (utime) - *utime = t->utime; - if (stime) - *stime = t->stime; + *utime = t->utime; + *stime = t->stime; } +static inline cputime_t task_gtime(struct task_struct *t) +{ + return t->gtime; +} +#endif + +#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME static inline void task_cputime_scaled(struct task_struct *t, cputime_t *utimescaled, cputime_t *stimescaled) { - if (utimescaled) - *utimescaled = t->utimescaled; - if (stimescaled) - *stimescaled = t->stimescaled; + *utimescaled = t->utimescaled; + *stimescaled = t->stimescaled; } - -static inline cputime_t task_gtime(struct task_struct *t) +#else +static inline void task_cputime_scaled(struct task_struct *t, + cputime_t *utimescaled, + cputime_t *stimescaled) { - return t->gtime; + task_cputime(t, utimescaled, stimescaled); } #endif + extern void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st); extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st); @@ -2444,6 +2477,10 @@ static inline void calc_load_enter_idle(void) { } static inline void calc_load_exit_idle(void) { } #endif /* CONFIG_NO_HZ_COMMON */ +#ifndef cpu_relax_yield +#define cpu_relax_yield() cpu_relax() +#endif + /* * Do not use outside of architecture code which knows its limitations. * @@ -2567,6 +2604,7 @@ extern void sched_autogroup_create_attach(struct task_struct *p); extern void sched_autogroup_detach(struct task_struct *p); extern void sched_autogroup_fork(struct signal_struct *sig); extern void sched_autogroup_exit(struct signal_struct *sig); +extern void sched_autogroup_exit_task(struct task_struct *p); #ifdef CONFIG_PROC_FS extern void proc_sched_autogroup_show_task(struct task_struct *p, struct seq_file *m); extern int proc_sched_autogroup_set_nice(struct task_struct *p, int nice); @@ -2576,6 +2614,7 @@ static inline void sched_autogroup_create_attach(struct task_struct *p) { } static inline void sched_autogroup_detach(struct task_struct *p) { } static inline void sched_autogroup_fork(struct signal_struct *sig) { } static inline void sched_autogroup_exit(struct signal_struct *sig) { } +static inline void sched_autogroup_exit_task(struct task_struct *p) { } #endif extern int yield_to(struct task_struct *p, bool preempt); @@ -3506,6 +3545,18 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu) #endif /* CONFIG_SMP */ +/* + * In order to reduce various lock holder preemption latencies provide an + * interface to see if a vCPU is currently running or not. + * + * This allows us to terminate optimistic spin loops and block, analogous to + * the native optimistic spin heuristic of testing if the lock owner task is + * running or not. + */ +#ifndef vcpu_is_preempted +# define vcpu_is_preempted(cpu) false +#endif + extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask); extern long sched_getaffinity(pid_t pid, struct cpumask *mask); diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index 22db1e63707e..441145351301 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -36,7 +36,6 @@ extern unsigned int sysctl_numa_balancing_scan_size; extern unsigned int sysctl_sched_migration_cost; extern unsigned int sysctl_sched_nr_migrate; extern unsigned int sysctl_sched_time_avg; -extern unsigned int sysctl_sched_shares_window; int sched_proc_update_handler(struct ctl_table *table, int write, void __user *buffer, size_t *length, diff --git a/include/linux/seg6.h b/include/linux/seg6.h new file mode 100644 index 000000000000..7a66d2b4c5a6 --- /dev/null +++ b/include/linux/seg6.h @@ -0,0 +1,6 @@ +#ifndef _LINUX_SEG6_H +#define _LINUX_SEG6_H + +#include <uapi/linux/seg6.h> + +#endif diff --git a/include/linux/seg6_genl.h b/include/linux/seg6_genl.h new file mode 100644 index 000000000000..d6c3fb4f3734 --- /dev/null +++ b/include/linux/seg6_genl.h @@ -0,0 +1,6 @@ +#ifndef _LINUX_SEG6_GENL_H +#define _LINUX_SEG6_GENL_H + +#include <uapi/linux/seg6_genl.h> + +#endif diff --git a/include/linux/seg6_hmac.h b/include/linux/seg6_hmac.h new file mode 100644 index 000000000000..da437ebdc6cd --- /dev/null +++ b/include/linux/seg6_hmac.h @@ -0,0 +1,6 @@ +#ifndef _LINUX_SEG6_HMAC_H +#define _LINUX_SEG6_HMAC_H + +#include <uapi/linux/seg6_hmac.h> + +#endif diff --git a/include/linux/seg6_iptunnel.h b/include/linux/seg6_iptunnel.h new file mode 100644 index 000000000000..5377cf6a5a02 --- /dev/null +++ b/include/linux/seg6_iptunnel.h @@ -0,0 +1,6 @@ +#ifndef _LINUX_SEG6_IPTUNNEL_H +#define _LINUX_SEG6_IPTUNNEL_H + +#include <uapi/linux/seg6_iptunnel.h> + +#endif diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 32810f279f8e..332e76756f54 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -645,8 +645,15 @@ struct sk_buff { struct rb_node rbnode; /* used in netem & tcp stack */ }; struct sock *sk; - struct net_device *dev; + union { + struct net_device *dev; + /* Some protocols might use this space to store information, + * while device pointer would be NULL. + * UDP receive path is one user. + */ + unsigned long dev_scratch; + }; /* * This is the control buffer. It is free to use for every * layer. Please put your private variables there. If you @@ -1087,7 +1094,7 @@ __skb_set_sw_hash(struct sk_buff *skb, __u32 hash, bool is_l4) } void __skb_get_hash(struct sk_buff *skb); -u32 __skb_get_hash_symmetric(struct sk_buff *skb); +u32 __skb_get_hash_symmetric(const struct sk_buff *skb); u32 skb_get_poff(const struct sk_buff *skb); u32 __skb_get_poff(const struct sk_buff *skb, void *data, const struct flow_keys *keys, int hlen); @@ -1799,11 +1806,11 @@ static inline unsigned int skb_headlen(const struct sk_buff *skb) return skb->len - skb->data_len; } -static inline int skb_pagelen(const struct sk_buff *skb) +static inline unsigned int skb_pagelen(const struct sk_buff *skb) { - int i, len = 0; + unsigned int i, len = 0; - for (i = (int)skb_shinfo(skb)->nr_frags - 1; i >= 0; i--) + for (i = skb_shinfo(skb)->nr_frags - 1; (int)i >= 0; i--) len += skb_frag_size(&skb_shinfo(skb)->frags[i]); return len + skb_headlen(skb); } @@ -1966,6 +1973,8 @@ static inline int pskb_may_pull(struct sk_buff *skb, unsigned int len) return __pskb_pull_tail(skb, len - skb_headlen(skb)) != NULL; } +void skb_condense(struct sk_buff *skb); + /** * skb_headroom - bytes at buffer head * @skb: buffer to check @@ -3033,9 +3042,13 @@ static inline void skb_frag_list_init(struct sk_buff *skb) int __skb_wait_for_more_packets(struct sock *sk, int *err, long *timeo_p, const struct sk_buff *skb); struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned flags, + void (*destructor)(struct sock *sk, + struct sk_buff *skb), int *peeked, int *off, int *err, struct sk_buff **last); struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned flags, + void (*destructor)(struct sock *sk, + struct sk_buff *skb), int *peeked, int *off, int *err); struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, int noblock, int *err); diff --git a/include/linux/smc91x.h b/include/linux/smc91x.h index e302c447e057..129bc674dcf5 100644 --- a/include/linux/smc91x.h +++ b/include/linux/smc91x.h @@ -39,6 +39,7 @@ struct smc91x_platdata { unsigned long flags; unsigned char leda; unsigned char ledb; + bool pxa_u16_align4; /* PXA buggy u16 writes on 4*n+2 addresses */ }; #endif /* __SMC91X_H__ */ diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 705840e0438f..266dab9ad782 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -88,6 +88,9 @@ struct stmmac_mdio_bus_data { struct stmmac_dma_cfg { int pbl; + int txpbl; + int rxpbl; + bool pblx8; int fixed_burst; int mixed_burst; bool aal; @@ -135,8 +138,6 @@ struct plat_stmmacenet_data { void (*bus_setup)(void __iomem *ioaddr); int (*init)(struct platform_device *pdev, void *priv); void (*exit)(struct platform_device *pdev, void *priv); - void (*suspend)(struct platform_device *pdev, void *priv); - void (*resume)(struct platform_device *pdev, void *priv); void *bsp_priv; struct stmmac_axi *axi; int has_gmac4; diff --git a/include/linux/swap.h b/include/linux/swap.h index a56523cefb9b..09b212d37f1d 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -246,39 +246,7 @@ struct swap_info_struct { void *workingset_eviction(struct address_space *mapping, struct page *page); bool workingset_refault(void *shadow); void workingset_activation(struct page *page); -extern struct list_lru workingset_shadow_nodes; - -static inline unsigned int workingset_node_pages(struct radix_tree_node *node) -{ - return node->count & RADIX_TREE_COUNT_MASK; -} - -static inline void workingset_node_pages_inc(struct radix_tree_node *node) -{ - node->count++; -} - -static inline void workingset_node_pages_dec(struct radix_tree_node *node) -{ - VM_WARN_ON_ONCE(!workingset_node_pages(node)); - node->count--; -} - -static inline unsigned int workingset_node_shadows(struct radix_tree_node *node) -{ - return node->count >> RADIX_TREE_COUNT_SHIFT; -} - -static inline void workingset_node_shadows_inc(struct radix_tree_node *node) -{ - node->count += 1U << RADIX_TREE_COUNT_SHIFT; -} - -static inline void workingset_node_shadows_dec(struct radix_tree_node *node) -{ - VM_WARN_ON_ONCE(!workingset_node_shadows(node)); - node->count -= 1U << RADIX_TREE_COUNT_SHIFT; -} +void workingset_update_node(struct radix_tree_node *node, void *private); /* linux/mm/page_alloc.c */ extern unsigned long totalram_pages; diff --git a/include/linux/tcp.h b/include/linux/tcp.h index a17ae7b85218..fc5848dad7a4 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -123,6 +123,7 @@ struct tcp_request_sock { u32 txhash; u32 rcv_isn; u32 snt_isn; + u32 ts_off; u32 last_oow_ack_time; /* last SYNACK */ u32 rcv_nxt; /* the ack # by SYNACK. For * FastOpen it's the seq# @@ -176,8 +177,6 @@ struct tcp_sock { * sum(delta(snd_una)), or how many bytes * were acked. */ - struct u64_stats_sync syncp; /* protects 64bit vars (cf tcp_get_info()) */ - u32 snd_una; /* First byte we want an ack for */ u32 snd_sml; /* Last byte of the most recently transmitted small packet */ u32 rcv_tstamp; /* timestamp of last received ACK (for keepalives) */ @@ -187,7 +186,6 @@ struct tcp_sock { u32 tsoffset; /* timestamp offset */ struct list_head tsq_node; /* anchor in tsq_tasklet.head list */ - unsigned long tsq_flags; /* Data for direct copy to user */ struct { @@ -213,8 +211,11 @@ struct tcp_sock { u8 reord; /* reordering detected */ } rack; u16 advmss; /* Advertised MSS */ - u8 rate_app_limited:1, /* rate_{delivered,interval_us} limited? */ - unused:7; + u32 chrono_start; /* Start time in jiffies of a TCP chrono */ + u32 chrono_stat[3]; /* Time in jiffies for chrono_stat stats */ + u8 chrono_type:2, /* current chronograph type */ + rate_app_limited:1, /* rate_{delivered,interval_us} limited? */ + unused:5; u8 nonagle : 4,/* Disable Nagle algorithm? */ thin_lto : 1,/* Use linear timeouts for thin streams */ thin_dupack : 1,/* Fast retransmit on first dupack */ @@ -362,7 +363,7 @@ struct tcp_sock { u32 *saved_syn; }; -enum tsq_flags { +enum tsq_enum { TSQ_THROTTLED, TSQ_QUEUED, TCP_TSQ_DEFERRED, /* tcp_tasklet_func() found socket was owned */ @@ -373,6 +374,15 @@ enum tsq_flags { */ }; +enum tsq_flags { + TSQF_THROTTLED = (1UL << TSQ_THROTTLED), + TSQF_QUEUED = (1UL << TSQ_QUEUED), + TCPF_TSQ_DEFERRED = (1UL << TCP_TSQ_DEFERRED), + TCPF_WRITE_TIMER_DEFERRED = (1UL << TCP_WRITE_TIMER_DEFERRED), + TCPF_DELACK_TIMER_DEFERRED = (1UL << TCP_DELACK_TIMER_DEFERRED), + TCPF_MTU_REDUCED_DEFERRED = (1UL << TCP_MTU_REDUCED_DEFERRED), +}; + static inline struct tcp_sock *tcp_sk(const struct sock *sk) { return (struct tcp_sock *)sk; @@ -427,4 +437,6 @@ static inline void tcp_saved_syn_free(struct tcp_sock *tp) tp->saved_syn = NULL; } +struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk); + #endif /* _LINUX_TCP_H */ diff --git a/include/linux/time.h b/include/linux/time.h index 4cea09d94208..23f0f5ce3090 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -172,8 +172,6 @@ extern int do_setitimer(int which, struct itimerval *value, struct itimerval *ovalue); extern int do_getitimer(int which, struct itimerval *value); -extern unsigned int alarm_setitimer(unsigned int seconds); - extern long do_utimes(int dfd, const char __user *filename, struct timespec *times, int flags); struct tms; diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index 09168c52ab64..361f8bf1429d 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -249,6 +249,7 @@ static inline u64 ktime_get_raw_ns(void) extern u64 ktime_get_mono_fast_ns(void); extern u64 ktime_get_raw_fast_ns(void); +extern u64 ktime_get_boot_fast_ns(void); /* * Timespec interfaces utilizing the ktime based ones diff --git a/include/linux/udp.h b/include/linux/udp.h index d1fd8cd39478..c0f530809d1f 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -79,6 +79,9 @@ struct udp_sock { int (*gro_complete)(struct sock *sk, struct sk_buff *skb, int nhoff); + + /* This field is dirtied by udp_recvmsg() */ + int forward_deficit; }; static inline struct udp_sock *udp_sk(const struct sock *sk) diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index 4a29c75b146e..0a294e950df8 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -27,6 +27,7 @@ #include <linux/errno.h> #include <linux/rbtree.h> #include <linux/types.h> +#include <linux/wait.h> struct vm_area_struct; struct mm_struct; diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h index 3a375d07d0dc..00d232406f18 100644 --- a/include/linux/usb/cdc_ncm.h +++ b/include/linux/usb/cdc_ncm.h @@ -81,7 +81,8 @@ #define CDC_NCM_TIMER_INTERVAL_MAX (U32_MAX / NSEC_PER_USEC) /* Driver flags */ -#define CDC_NCM_FLAG_NDP_TO_END 0x02 /* NDP is placed at end of frame */ +#define CDC_NCM_FLAG_NDP_TO_END 0x02 /* NDP is placed at end of frame */ +#define CDC_MBIM_FLAG_AVOID_ALTSETTING_TOGGLE 0x04 /* Avoid altsetting toggle during init */ #define cdc_ncm_comm_intf_is_mbim(x) ((x)->desc.bInterfaceSubClass == USB_CDC_SUBCLASS_MBIM && \ (x)->desc.bInterfaceProtocol == USB_CDC_PROTO_NONE) diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index 1c912f85e041..66204007d7ac 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -58,7 +58,7 @@ static inline int virtio_net_hdr_from_skb(const struct sk_buff *skb, struct virtio_net_hdr *hdr, bool little_endian) { - memset(hdr, 0, sizeof(*hdr)); + memset(hdr, 0, sizeof(*hdr)); /* no info leak */ if (skb_is_gso(skb)) { struct skb_shared_info *sinfo = skb_shinfo(skb); @@ -98,4 +98,4 @@ static inline int virtio_net_hdr_from_skb(const struct sk_buff *skb, return 0; } -#endif /* _LINUX_VIRTIO_BYTEORDER */ +#endif /* _LINUX_VIRTIO_NET_H */ diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 3d9d786a943c..d68edffbf142 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -82,6 +82,7 @@ extern void *__vmalloc_node_range(unsigned long size, unsigned long align, const void *caller); extern void vfree(const void *addr); +extern void vfree_atomic(const void *addr); extern void *vmap(struct page **pages, unsigned int count, unsigned long flags, pgprot_t prot); diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index fc6e22186405..d4f16cf6281c 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -119,18 +119,30 @@ struct delayed_work { int cpu; }; -/* - * A struct for workqueue attributes. This can be used to change - * attributes of an unbound workqueue. +/** + * struct workqueue_attrs - A struct for workqueue attributes. * - * Unlike other fields, ->no_numa isn't a property of a worker_pool. It - * only modifies how apply_workqueue_attrs() select pools and thus doesn't - * participate in pool hash calculations or equality comparisons. + * This can be used to change attributes of an unbound workqueue. */ struct workqueue_attrs { - int nice; /* nice level */ - cpumask_var_t cpumask; /* allowed CPUs */ - bool no_numa; /* disable NUMA affinity */ + /** + * @nice: nice level + */ + int nice; + + /** + * @cpumask: allowed CPUs + */ + cpumask_var_t cpumask; + + /** + * @no_numa: disable NUMA affinity + * + * Unlike other fields, ``no_numa`` isn't a property of a worker_pool. It + * only modifies how :c:func:`apply_workqueue_attrs` select pools and thus + * doesn't participate in pool hash calculations or equality comparisons. + */ + bool no_numa; }; static inline struct delayed_work *to_delayed_work(struct work_struct *work) @@ -272,7 +284,7 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; } /* * Workqueue flags and constants. For details, please refer to - * Documentation/workqueue.txt. + * Documentation/core-api/workqueue.rst. */ enum { WQ_UNBOUND = 1 << 1, /* not bound to any cpu */ @@ -370,7 +382,8 @@ __alloc_workqueue_key(const char *fmt, unsigned int flags, int max_active, * @args...: args for @fmt * * Allocate a workqueue with the specified parameters. For detailed - * information on WQ_* flags, please refer to Documentation/workqueue.txt. + * information on WQ_* flags, please refer to + * Documentation/core-api/workqueue.rst. * * The __lock_name macro dance is to guarantee that single lock_class_key * doesn't end up with different namesm, which isn't allowed by lockdep. diff --git a/include/linux/ww_mutex.h b/include/linux/ww_mutex.h index 2bb5deb0012e..7b0066814fa0 100644 --- a/include/linux/ww_mutex.h +++ b/include/linux/ww_mutex.h @@ -120,7 +120,7 @@ static inline void ww_acquire_init(struct ww_acquire_ctx *ctx, struct ww_class *ww_class) { ctx->task = current; - ctx->stamp = atomic_long_inc_return(&ww_class->stamp); + ctx->stamp = atomic_long_inc_return_relaxed(&ww_class->stamp); ctx->acquired = 0; #ifdef CONFIG_DEBUG_MUTEXES ctx->ww_class = ww_class; diff --git a/include/net/act_api.h b/include/net/act_api.h index 82f3c912a5b1..1d716449209e 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -36,13 +36,12 @@ struct tc_action { struct tcf_t tcfa_tm; struct gnet_stats_basic_packed tcfa_bstats; struct gnet_stats_queue tcfa_qstats; - struct gnet_stats_rate_est64 tcfa_rate_est; + struct net_rate_estimator __rcu *tcfa_rate_est; spinlock_t tcfa_lock; struct rcu_head tcfa_rcu; struct gnet_stats_basic_cpu __percpu *cpu_bstats; struct gnet_stats_queue __percpu *cpu_qstats; }; -#define tcf_act common.tcfa_act #define tcf_head common.tcfa_head #define tcf_index common.tcfa_index #define tcf_refcnt common.tcfa_refcnt @@ -120,6 +119,8 @@ struct tc_action_ops { int (*walk)(struct net *, struct sk_buff *, struct netlink_callback *, int, const struct tc_action_ops *); void (*stats_update)(struct tc_action *, u64, u32, u64); + int (*get_dev)(const struct tc_action *a, struct net *net, + struct net_device **mirred_dev); }; struct tc_action_net { diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index 0a1e21d7bce1..01487192f628 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -197,7 +197,7 @@ typedef struct { #define BDADDR_LE_PUBLIC 0x01 #define BDADDR_LE_RANDOM 0x02 -static inline bool bdaddr_type_is_valid(__u8 type) +static inline bool bdaddr_type_is_valid(u8 type) { switch (type) { case BDADDR_BREDR: @@ -209,7 +209,7 @@ static inline bool bdaddr_type_is_valid(__u8 type) return false; } -static inline bool bdaddr_type_is_le(__u8 type) +static inline bool bdaddr_type_is_le(u8 type) { switch (type) { case BDADDR_LE_PUBLIC: @@ -279,15 +279,16 @@ struct sock *bt_accept_dequeue(struct sock *parent, struct socket *newsock); /* Skb helpers */ struct l2cap_ctrl { - __u8 sframe:1, + u8 sframe:1, poll:1, final:1, fcs:1, sar:2, super:2; - __u16 reqseq; - __u16 txseq; - __u8 retries; + + u16 reqseq; + u16 txseq; + u8 retries; __le16 psm; bdaddr_t bdaddr; struct l2cap_chan *chan; @@ -303,7 +304,7 @@ typedef void (*hci_req_complete_skb_t)(struct hci_dev *hdev, u8 status, #define HCI_REQ_SKB BIT(1) struct hci_ctrl { - __u16 opcode; + u16 opcode; u8 req_flags; u8 req_event; union { @@ -313,10 +314,10 @@ struct hci_ctrl { }; struct bt_skb_cb { - __u8 pkt_type; - __u8 force_active; - __u16 expect; - __u8 incoming:1; + u8 pkt_type; + u8 force_active; + u16 expect; + u8 incoming:1; union { struct l2cap_ctrl l2cap; struct hci_ctrl hci; @@ -366,7 +367,7 @@ out: return NULL; } -int bt_to_errno(__u16 code); +int bt_to_errno(u16 code); void hci_sock_set_flag(struct sock *sk, int nr); void hci_sock_clear_flag(struct sock *sk, int nr); diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index f00bf667ec33..554671c81f4a 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1018,7 +1018,7 @@ static inline void hci_set_drvdata(struct hci_dev *hdev, void *data) } struct hci_dev *hci_dev_get(int index); -struct hci_dev *hci_get_route(bdaddr_t *dst, bdaddr_t *src); +struct hci_dev *hci_get_route(bdaddr_t *dst, bdaddr_t *src, u8 src_type); struct hci_dev *hci_alloc_dev(void); void hci_free_dev(struct hci_dev *hdev); diff --git a/include/net/bonding.h b/include/net/bonding.h index f32f7ef8a23a..3c857778a6ca 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -681,7 +681,7 @@ static inline int bond_get_targets_ip(__be32 *targets, __be32 ip) } /* exported from bond_main.c */ -extern int bond_net_id; +extern unsigned int bond_net_id; extern const struct bond_parm_tbl bond_lacp_tbl[]; extern const struct bond_parm_tbl xmit_hashtype_tbl[]; extern const struct bond_parm_tbl arp_validate_tbl[]; diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h index 2fbeb1313c0f..d73b849e29a6 100644 --- a/include/net/busy_poll.h +++ b/include/net/busy_poll.h @@ -58,10 +58,9 @@ static inline unsigned long busy_loop_end_time(void) return busy_loop_us_clock() + ACCESS_ONCE(sysctl_net_busy_poll); } -static inline bool sk_can_busy_loop(struct sock *sk) +static inline bool sk_can_busy_loop(const struct sock *sk) { - return sk->sk_ll_usec && sk->sk_napi_id && - !need_resched() && !signal_pending(current); + return sk->sk_ll_usec && sk->sk_napi_id && !signal_pending(current); } @@ -81,11 +80,6 @@ static inline void skb_mark_napi_id(struct sk_buff *skb, skb->napi_id = napi->napi_id; } -/* used in the protocol hanlder to propagate the napi_id to the socket */ -static inline void sk_mark_napi_id(struct sock *sk, struct sk_buff *skb) -{ - sk->sk_napi_id = skb->napi_id; -} #else /* CONFIG_NET_RX_BUSY_POLL */ static inline unsigned long net_busy_loop_on(void) @@ -108,10 +102,6 @@ static inline void skb_mark_napi_id(struct sk_buff *skb, { } -static inline void sk_mark_napi_id(struct sock *sk, struct sk_buff *skb) -{ -} - static inline bool busy_loop_timeout(unsigned long end_time) { return true; @@ -123,4 +113,23 @@ static inline bool sk_busy_loop(struct sock *sk, int nonblock) } #endif /* CONFIG_NET_RX_BUSY_POLL */ + +/* used in the protocol hanlder to propagate the napi_id to the socket */ +static inline void sk_mark_napi_id(struct sock *sk, const struct sk_buff *skb) +{ +#ifdef CONFIG_NET_RX_BUSY_POLL + sk->sk_napi_id = skb->napi_id; +#endif +} + +/* variant used for unconnected sockets */ +static inline void sk_mark_napi_id_once(struct sock *sk, + const struct sk_buff *skb) +{ +#ifdef CONFIG_NET_RX_BUSY_POLL + if (!sk->sk_napi_id) + sk->sk_napi_id = skb->napi_id; +#endif +} + #endif /* _LINUX_NET_BUSY_POLL_H */ diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 14b51d739c3b..814be4b4200c 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -772,6 +772,30 @@ struct cfg80211_csa_settings { }; /** + * struct iface_combination_params - input parameters for interface combinations + * + * Used to pass interface combination parameters + * + * @num_different_channels: the number of different channels we want + * to use for verification + * @radar_detect: a bitmap where each bit corresponds to a channel + * width where radar detection is needed, as in the definition of + * &struct ieee80211_iface_combination.@radar_detect_widths + * @iftype_num: array with the number of interfaces of each interface + * type. The index is the interface type as specified in &enum + * nl80211_iftype. + * @new_beacon_int: set this to the beacon interval of a new interface + * that's not operating yet, if such is to be checked as part of + * the verification + */ +struct iface_combination_params { + int num_different_channels; + u8 radar_detect; + int iftype_num[NUM_NL80211_IFTYPES]; + u32 new_beacon_int; +}; + +/** * enum station_parameters_apply_mask - station parameter values to apply * @STATION_PARAM_APPLY_UAPSD: apply new uAPSD parameters (uapsd_queues, max_sp) * @STATION_PARAM_APPLY_CAPABILITY: apply new capability @@ -1761,9 +1785,11 @@ const u8 *ieee80211_bss_get_ie(struct cfg80211_bss *bss, u8 ie); * @key_len: length of WEP key for shared key authentication * @key_idx: index of WEP key for shared key authentication * @key: WEP key for shared key authentication - * @sae_data: Non-IE data to use with SAE or %NULL. This starts with - * Authentication transaction sequence number field. - * @sae_data_len: Length of sae_data buffer in octets + * @auth_data: Fields and elements in Authentication frames. This contains + * the authentication frame body (non-IE and IE data), excluding the + * Authentication algorithm number, i.e., starting at the Authentication + * transaction sequence number field. + * @auth_data_len: Length of auth_data buffer in octets */ struct cfg80211_auth_request { struct cfg80211_bss *bss; @@ -1772,8 +1798,8 @@ struct cfg80211_auth_request { enum nl80211_auth_type auth_type; const u8 *key; u8 key_len, key_idx; - const u8 *sae_data; - size_t sae_data_len; + const u8 *auth_data; + size_t auth_data_len; }; /** @@ -1814,6 +1840,12 @@ enum cfg80211_assoc_req_flags { * @ht_capa_mask: The bits of ht_capa which are to be used. * @vht_capa: VHT capability override * @vht_capa_mask: VHT capability mask indicating which fields to use + * @fils_kek: FILS KEK for protecting (Re)Association Request/Response frame or + * %NULL if FILS is not used. + * @fils_kek_len: Length of fils_kek in octets + * @fils_nonces: FILS nonces (part of AAD) for protecting (Re)Association + * Request/Response frame or %NULL if FILS is not used. This field starts + * with 16 octets of STA Nonce followed by 16 octets of AP Nonce. */ struct cfg80211_assoc_request { struct cfg80211_bss *bss; @@ -1825,6 +1857,9 @@ struct cfg80211_assoc_request { struct ieee80211_ht_cap ht_capa; struct ieee80211_ht_cap ht_capa_mask; struct ieee80211_vht_cap vht_capa, vht_capa_mask; + const u8 *fils_kek; + size_t fils_kek_len; + const u8 *fils_nonces; }; /** @@ -2016,6 +2051,18 @@ struct cfg80211_connect_params { }; /** + * enum cfg80211_connect_params_changed - Connection parameters being updated + * + * This enum provides information of all connect parameters that + * have to be updated as part of update_connect_params() call. + * + * @UPDATE_ASSOC_IES: Indicates whether association request IEs are updated + */ +enum cfg80211_connect_params_changed { + UPDATE_ASSOC_IES = BIT(0), +}; + +/** * enum wiphy_params_flags - set_wiphy_params bitfield values * @WIPHY_PARAM_RETRY_SHORT: wiphy->retry_short has changed * @WIPHY_PARAM_RETRY_LONG: wiphy->retry_long has changed @@ -2536,9 +2583,18 @@ struct cfg80211_nan_func { * cases, the result of roaming is indicated with a call to * cfg80211_roamed() or cfg80211_roamed_bss(). * (invoked with the wireless_dev mutex held) - * @disconnect: Disconnect from the BSS/ESS. Once done, call - * cfg80211_disconnected(). + * @update_connect_params: Update the connect parameters while connected to a + * BSS. The updated parameters can be used by driver/firmware for + * subsequent BSS selection (roaming) decisions and to form the + * Authentication/(Re)Association Request frames. This call does not + * request an immediate disassociation or reassociation with the current + * BSS, i.e., this impacts only subsequent (re)associations. The bits in + * changed are defined in &enum cfg80211_connect_params_changed. * (invoked with the wireless_dev mutex held) + * @disconnect: Disconnect from the BSS/ESS or stop connection attempts if + * connection is in progress. Once done, call cfg80211_disconnected() in + * case connection was already established (invoked with the + * wireless_dev mutex held), otherwise call cfg80211_connect_timeout(). * * @join_ibss: Join the specified IBSS (or create if necessary). Once done, call * cfg80211_ibss_joined(), also call that function when changing BSSID due @@ -2706,6 +2762,8 @@ struct cfg80211_nan_func { * @nan_change_conf: changes NAN configuration. The changed parameters must * be specified in @changes (using &enum cfg80211_nan_conf_changes); * All other parameters must be ignored. + * + * @set_multicast_to_unicast: configure multicast to unicast conversion for BSS */ struct cfg80211_ops { int (*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow); @@ -2820,6 +2878,10 @@ struct cfg80211_ops { int (*connect)(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_connect_params *sme); + int (*update_connect_params)(struct wiphy *wiphy, + struct net_device *dev, + struct cfg80211_connect_params *sme, + u32 changed); int (*disconnect)(struct wiphy *wiphy, struct net_device *dev, u16 reason_code); @@ -2982,6 +3044,10 @@ struct cfg80211_ops { struct wireless_dev *wdev, struct cfg80211_nan_conf *conf, u32 changes); + + int (*set_multicast_to_unicast)(struct wiphy *wiphy, + struct net_device *dev, + const bool enabled); }; /* @@ -3080,6 +3146,12 @@ struct ieee80211_iface_limit { * only in special cases. * @radar_detect_widths: bitmap of channel widths supported for radar detection * @radar_detect_regions: bitmap of regions supported for radar detection + * @beacon_int_min_gcd: This interface combination supports different + * beacon intervals. + * = 0 - all beacon intervals for different interface must be same. + * > 0 - any beacon interval for the interface part of this combination AND + * *GCD* of all beacon intervals from beaconing interfaces of this + * combination must be greater or equal to this value. * * With this structure the driver can describe which interface * combinations it supports concurrently. @@ -3145,6 +3217,7 @@ struct ieee80211_iface_combination { bool beacon_int_infra_match; u8 radar_detect_widths; u8 radar_detect_regions; + u32 beacon_int_min_gcd; }; struct ieee80211_txrx_stypes { @@ -3752,8 +3825,8 @@ struct cfg80211_cached_keys; * @beacon_interval: beacon interval used on this device for transmitting * beacons, 0 when not valid * @address: The address for this device, valid only if @netdev is %NULL - * @p2p_started: true if this is a P2P Device that has been started - * @nan_started: true if this is a NAN interface that has been started + * @is_running: true if this is a non-netdev device that has been started, e.g. + * the P2P Device. * @cac_started: true if DFS channel availability check has been started * @cac_start_time: timestamp (jiffies) when the dfs state was entered. * @cac_time_ms: CAC time in ms @@ -3785,7 +3858,7 @@ struct wireless_dev { struct mutex mtx; - bool use_4addr, p2p_started, nan_started; + bool use_4addr, is_running; u8 address[ETH_ALEN] __aligned(sizeof(u16)); @@ -3842,6 +3915,13 @@ static inline u8 *wdev_address(struct wireless_dev *wdev) return wdev->address; } +static inline bool wdev_running(struct wireless_dev *wdev) +{ + if (wdev->netdev) + return netif_running(wdev->netdev); + return wdev->is_running; +} + /** * wdev_priv - return wiphy priv from wireless_dev * @@ -4163,6 +4243,27 @@ static inline const u8 *cfg80211_find_ie(u8 eid, const u8 *ies, int len) } /** + * cfg80211_find_ext_ie - find information element with EID Extension in data + * + * @ext_eid: element ID Extension + * @ies: data consisting of IEs + * @len: length of data + * + * Return: %NULL if the extended element ID could not be found or if + * the element is invalid (claims to be longer than the given + * data), or a pointer to the first byte of the requested + * element, that is the byte containing the element ID. + * + * Note: There are no checks on the element length other than + * having to fit into the given data. + */ +static inline const u8 *cfg80211_find_ext_ie(u8 ext_eid, const u8 *ies, int len) +{ + return cfg80211_find_ie_match(WLAN_EID_EXTENSION, ies, len, + &ext_eid, 1, 2); +} + +/** * cfg80211_find_vendor_ie - find vendor specific information element in data * * @oui: vendor OUI @@ -4562,7 +4663,8 @@ void cfg80211_auth_timeout(struct net_device *dev, const u8 *addr); * moves to cfg80211 in this call * @buf: authentication frame (header + body) * @len: length of the frame data - * @uapsd_queues: bitmap of ACs configured to uapsd. -1 if n/a. + * @uapsd_queues: bitmap of queues configured for uapsd. Same format + * as the AC bitmap in the QoS info field * * After being asked to associate via cfg80211_ops::assoc() the driver must * call either this function or cfg80211_auth_timeout(). @@ -4584,6 +4686,17 @@ void cfg80211_rx_assoc_resp(struct net_device *dev, void cfg80211_assoc_timeout(struct net_device *dev, struct cfg80211_bss *bss); /** + * cfg80211_abandon_assoc - notify cfg80211 of abandoned association attempt + * @dev: network device + * @bss: The BSS entry with which association was abandoned. + * + * Call this whenever - for reasons reported through other API, like deauth RX, + * an association attempt was abandoned. + * This function may sleep. The caller must hold the corresponding wdev's mutex. + */ +void cfg80211_abandon_assoc(struct net_device *dev, struct cfg80211_bss *bss); + +/** * cfg80211_tx_mlme_mgmt - notification of transmitted deauth/disassoc frame * @dev: network device * @buf: 802.11 frame (header + body) @@ -5598,36 +5711,20 @@ unsigned int ieee80211_get_num_supported_channels(struct wiphy *wiphy); * cfg80211_check_combinations - check interface combinations * * @wiphy: the wiphy - * @num_different_channels: the number of different channels we want - * to use for verification - * @radar_detect: a bitmap where each bit corresponds to a channel - * width where radar detection is needed, as in the definition of - * &struct ieee80211_iface_combination.@radar_detect_widths - * @iftype_num: array with the numbers of interfaces of each interface - * type. The index is the interface type as specified in &enum - * nl80211_iftype. + * @params: the interface combinations parameter * * This function can be called by the driver to check whether a * combination of interfaces and their types are allowed according to * the interface combinations. */ int cfg80211_check_combinations(struct wiphy *wiphy, - const int num_different_channels, - const u8 radar_detect, - const int iftype_num[NUM_NL80211_IFTYPES]); + struct iface_combination_params *params); /** * cfg80211_iter_combinations - iterate over matching combinations * * @wiphy: the wiphy - * @num_different_channels: the number of different channels we want - * to use for verification - * @radar_detect: a bitmap where each bit corresponds to a channel - * width where radar detection is needed, as in the definition of - * &struct ieee80211_iface_combination.@radar_detect_widths - * @iftype_num: array with the numbers of interfaces of each interface - * type. The index is the interface type as specified in &enum - * nl80211_iftype. + * @params: the interface combinations parameter * @iter: function to call for each matching combination * @data: pointer to pass to iter function * @@ -5636,9 +5733,7 @@ int cfg80211_check_combinations(struct wiphy *wiphy, * purposes. */ int cfg80211_iter_combinations(struct wiphy *wiphy, - const int num_different_channels, - const u8 radar_detect, - const int iftype_num[NUM_NL80211_IFTYPES], + struct iface_combination_params *params, void (*iter)(const struct ieee80211_iface_combination *c, void *data), void *data); diff --git a/include/net/devlink.h b/include/net/devlink.h index 211bd3c37028..d29e5fc82582 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -92,6 +92,8 @@ struct devlink_ops { int (*eswitch_mode_get)(struct devlink *devlink, u16 *p_mode); int (*eswitch_mode_set)(struct devlink *devlink, u16 mode); + int (*eswitch_inline_mode_get)(struct devlink *devlink, u8 *p_inline_mode); + int (*eswitch_inline_mode_set)(struct devlink *devlink, u8 inline_mode); }; static inline void *devlink_priv(struct devlink *devlink) diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h index 6965c8f68ade..701fc814d0af 100644 --- a/include/net/dst_metadata.h +++ b/include/net/dst_metadata.h @@ -115,6 +115,7 @@ static inline struct ip_tunnel_info *skb_tunnel_info_unclone(struct sk_buff *skb static inline struct metadata_dst *__ip_tun_set_dst(__be32 saddr, __be32 daddr, __u8 tos, __u8 ttl, + __be16 tp_dst, __be16 flags, __be64 tunnel_id, int md_size) @@ -127,7 +128,7 @@ static inline struct metadata_dst *__ip_tun_set_dst(__be32 saddr, ip_tunnel_key_init(&tun_dst->u.tun_info.key, saddr, daddr, tos, ttl, - 0, 0, 0, tunnel_id, flags); + 0, 0, tp_dst, tunnel_id, flags); return tun_dst; } @@ -139,12 +140,13 @@ static inline struct metadata_dst *ip_tun_rx_dst(struct sk_buff *skb, const struct iphdr *iph = ip_hdr(skb); return __ip_tun_set_dst(iph->saddr, iph->daddr, iph->tos, iph->ttl, - flags, tunnel_id, md_size); + 0, flags, tunnel_id, md_size); } static inline struct metadata_dst *__ipv6_tun_set_dst(const struct in6_addr *saddr, const struct in6_addr *daddr, __u8 tos, __u8 ttl, + __be16 tp_dst, __be32 label, __be16 flags, __be64 tunnel_id, @@ -162,7 +164,7 @@ static inline struct metadata_dst *__ipv6_tun_set_dst(const struct in6_addr *sad info->key.tun_flags = flags; info->key.tun_id = tunnel_id; info->key.tp_src = 0; - info->key.tp_dst = 0; + info->key.tp_dst = tp_dst; info->key.u.ipv6.src = *saddr; info->key.u.ipv6.dst = *daddr; @@ -183,7 +185,7 @@ static inline struct metadata_dst *ipv6_tun_rx_dst(struct sk_buff *skb, return __ipv6_tun_set_dst(&ip6h->saddr, &ip6h->daddr, ipv6_get_dsfield(ip6h), ip6h->hop_limit, - ip6_flowlabel(ip6h), flags, tunnel_id, + 0, ip6_flowlabel(ip6h), flags, tunnel_id, md_size); } #endif /* __NET_DST_METADATA_H */ diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index 456e4a6006ab..8dbfdf728cd8 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -8,6 +8,11 @@ #include <net/flow.h> #include <net/rtnetlink.h> +struct fib_kuid_range { + kuid_t start; + kuid_t end; +}; + struct fib_rule { struct list_head list; int iifindex; @@ -30,6 +35,7 @@ struct fib_rule { int suppress_prefixlen; char iifname[IFNAMSIZ]; char oifname[IFNAMSIZ]; + struct fib_kuid_range uid_range; struct rcu_head rcu; }; @@ -92,7 +98,8 @@ struct fib_rules_ops { [FRA_SUPPRESS_PREFIXLEN] = { .type = NLA_U32 }, \ [FRA_SUPPRESS_IFGROUP] = { .type = NLA_U32 }, \ [FRA_GOTO] = { .type = NLA_U32 }, \ - [FRA_L3MDEV] = { .type = NLA_U8 } + [FRA_L3MDEV] = { .type = NLA_U8 }, \ + [FRA_UID_RANGE] = { .len = sizeof(struct fib_rule_uid_range) } static inline void fib_rule_get(struct fib_rule *rule) { diff --git a/include/net/flow.h b/include/net/flow.h index 035aa7716967..6984f1913dc1 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -11,6 +11,7 @@ #include <linux/in6.h> #include <linux/atomic.h> #include <net/flow_dissector.h> +#include <linux/uidgid.h> /* * ifindex generation is per-net namespace, and loopback is @@ -37,6 +38,7 @@ struct flowi_common { #define FLOWI_FLAG_SKIP_NH_OIF 0x04 __u32 flowic_secid; struct flowi_tunnel flowic_tun_key; + kuid_t flowic_uid; }; union flowi_uli { @@ -74,6 +76,7 @@ struct flowi4 { #define flowi4_flags __fl_common.flowic_flags #define flowi4_secid __fl_common.flowic_secid #define flowi4_tun_key __fl_common.flowic_tun_key +#define flowi4_uid __fl_common.flowic_uid /* (saddr,daddr) must be grouped, same order as in IP header */ __be32 saddr; @@ -93,7 +96,8 @@ static inline void flowi4_init_output(struct flowi4 *fl4, int oif, __u32 mark, __u8 tos, __u8 scope, __u8 proto, __u8 flags, __be32 daddr, __be32 saddr, - __be16 dport, __be16 sport) + __be16 dport, __be16 sport, + kuid_t uid) { fl4->flowi4_oif = oif; fl4->flowi4_iif = LOOPBACK_IFINDEX; @@ -104,6 +108,7 @@ static inline void flowi4_init_output(struct flowi4 *fl4, int oif, fl4->flowi4_flags = flags; fl4->flowi4_secid = 0; fl4->flowi4_tun_key.tun_id = 0; + fl4->flowi4_uid = uid; fl4->daddr = daddr; fl4->saddr = saddr; fl4->fl4_dport = dport; @@ -131,6 +136,7 @@ struct flowi6 { #define flowi6_flags __fl_common.flowic_flags #define flowi6_secid __fl_common.flowic_secid #define flowi6_tun_key __fl_common.flowic_tun_key +#define flowi6_uid __fl_common.flowic_uid struct in6_addr daddr; struct in6_addr saddr; /* Note: flowi6_tos is encoded in flowlabel, too. */ @@ -176,6 +182,7 @@ struct flowi { #define flowi_flags u.__fl_common.flowic_flags #define flowi_secid u.__fl_common.flowic_secid #define flowi_tun_key u.__fl_common.flowic_tun_key +#define flowi_uid u.__fl_common.flowic_uid } __attribute__((__aligned__(BITS_PER_LONG/8))); static inline struct flowi *flowi4_to_flowi(struct flowi4 *fl4) @@ -239,6 +246,7 @@ struct flow_cache_object *flow_cache_lookup(struct net *net, void *ctx); int flow_cache_init(struct net *net); void flow_cache_fini(struct net *net); +void flow_cache_hp_init(void); void flow_cache_flush(struct net *net); void flow_cache_flush_deferred(struct net *net); diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index d9534927d93b..d896a33e00d4 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -104,6 +104,22 @@ struct flow_dissector_key_ports { }; }; +/** + * flow_dissector_key_icmp: + * @ports: type and code of ICMP header + * icmp: ICMP type (high) and code (low) + * type: ICMP type + * code: ICMP code + */ +struct flow_dissector_key_icmp { + union { + __be16 icmp; + struct { + u8 type; + u8 code; + }; + }; +}; /** * struct flow_dissector_key_eth_addrs: @@ -122,12 +138,18 @@ enum flow_dissector_key_id { FLOW_DISSECTOR_KEY_IPV4_ADDRS, /* struct flow_dissector_key_ipv4_addrs */ FLOW_DISSECTOR_KEY_IPV6_ADDRS, /* struct flow_dissector_key_ipv6_addrs */ FLOW_DISSECTOR_KEY_PORTS, /* struct flow_dissector_key_ports */ + FLOW_DISSECTOR_KEY_ICMP, /* struct flow_dissector_key_icmp */ FLOW_DISSECTOR_KEY_ETH_ADDRS, /* struct flow_dissector_key_eth_addrs */ FLOW_DISSECTOR_KEY_TIPC_ADDRS, /* struct flow_dissector_key_tipc_addrs */ FLOW_DISSECTOR_KEY_VLAN, /* struct flow_dissector_key_flow_vlan */ FLOW_DISSECTOR_KEY_FLOW_LABEL, /* struct flow_dissector_key_flow_tags */ FLOW_DISSECTOR_KEY_GRE_KEYID, /* struct flow_dissector_key_keyid */ FLOW_DISSECTOR_KEY_MPLS_ENTROPY, /* struct flow_dissector_key_keyid */ + FLOW_DISSECTOR_KEY_ENC_KEYID, /* struct flow_dissector_key_keyid */ + FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS, /* struct flow_dissector_key_ipv4_addrs */ + FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS, /* struct flow_dissector_key_ipv6_addrs */ + FLOW_DISSECTOR_KEY_ENC_CONTROL, /* struct flow_dissector_key_control */ + FLOW_DISSECTOR_KEY_ENC_PORTS, /* struct flow_dissector_key_ports */ FLOW_DISSECTOR_KEY_MAX, }; diff --git a/include/net/flowcache.h b/include/net/flowcache.h index c8f665ec6e0d..9caf3bfc8d2d 100644 --- a/include/net/flowcache.h +++ b/include/net/flowcache.h @@ -17,7 +17,7 @@ struct flow_cache_percpu { struct flow_cache { u32 hash_shift; struct flow_cache_percpu __percpu *percpu; - struct notifier_block hotcpu_notifier; + struct hlist_node node; int low_watermark; int high_watermark; struct timer_list rnd_timer; diff --git a/include/net/gen_stats.h b/include/net/gen_stats.h index 231e121cc7d9..8b7aa370e7a4 100644 --- a/include/net/gen_stats.h +++ b/include/net/gen_stats.h @@ -11,6 +11,8 @@ struct gnet_stats_basic_cpu { struct u64_stats_sync syncp; }; +struct net_rate_estimator; + struct gnet_dump { spinlock_t * lock; struct sk_buff * skb; @@ -42,8 +44,7 @@ void __gnet_stats_copy_basic(const seqcount_t *running, struct gnet_stats_basic_cpu __percpu *cpu, struct gnet_stats_basic_packed *b); int gnet_stats_copy_rate_est(struct gnet_dump *d, - const struct gnet_stats_basic_packed *b, - struct gnet_stats_rate_est64 *r); + struct net_rate_estimator __rcu **ptr); int gnet_stats_copy_queue(struct gnet_dump *d, struct gnet_stats_queue __percpu *cpu_q, struct gnet_stats_queue *q, __u32 qlen); @@ -53,16 +54,16 @@ int gnet_stats_finish_copy(struct gnet_dump *d); int gen_new_estimator(struct gnet_stats_basic_packed *bstats, struct gnet_stats_basic_cpu __percpu *cpu_bstats, - struct gnet_stats_rate_est64 *rate_est, + struct net_rate_estimator __rcu **rate_est, spinlock_t *stats_lock, seqcount_t *running, struct nlattr *opt); -void gen_kill_estimator(struct gnet_stats_basic_packed *bstats, - struct gnet_stats_rate_est64 *rate_est); +void gen_kill_estimator(struct net_rate_estimator __rcu **ptr); int gen_replace_estimator(struct gnet_stats_basic_packed *bstats, struct gnet_stats_basic_cpu __percpu *cpu_bstats, - struct gnet_stats_rate_est64 *rate_est, + struct net_rate_estimator __rcu **ptr, spinlock_t *stats_lock, seqcount_t *running, struct nlattr *opt); -bool gen_estimator_active(const struct gnet_stats_basic_packed *bstats, - const struct gnet_stats_rate_est64 *rate_est); +bool gen_estimator_active(struct net_rate_estimator __rcu **ptr); +bool gen_estimator_read(struct net_rate_estimator __rcu **ptr, + struct gnet_stats_rate_est64 *sample); #endif diff --git a/include/net/genetlink.h b/include/net/genetlink.h index 8d4608ce8716..a34275be3600 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -20,7 +20,7 @@ struct genl_info; /** * struct genl_family - generic netlink family - * @id: protocol family idenfitier + * @id: protocol family identifier (private) * @hdrsize: length of user specific header in bytes * @name: name of family * @version: protocol version @@ -39,16 +39,16 @@ struct genl_info; * Note that unbind() will not be called symmetrically if the * generic netlink family is removed while there are still open * sockets. - * @attrbuf: buffer to store parsed attributes - * @family_list: family list - * @mcgrps: multicast groups used by this family (private) - * @n_mcgrps: number of multicast groups (private) + * @attrbuf: buffer to store parsed attributes (private) + * @mcgrps: multicast groups used by this family + * @n_mcgrps: number of multicast groups * @mcgrp_offset: starting number of multicast group IDs in this family - * @ops: the operations supported by this family (private) - * @n_ops: number of operations supported by this family (private) + * (private) + * @ops: the operations supported by this family + * @n_ops: number of operations supported by this family */ struct genl_family { - unsigned int id; + int id; /* private */ unsigned int hdrsize; char name[GENL_NAMSIZ]; unsigned int version; @@ -64,15 +64,16 @@ struct genl_family { int (*mcast_bind)(struct net *net, int group); void (*mcast_unbind)(struct net *net, int group); struct nlattr ** attrbuf; /* private */ - const struct genl_ops * ops; /* private */ - const struct genl_multicast_group *mcgrps; /* private */ - unsigned int n_ops; /* private */ - unsigned int n_mcgrps; /* private */ + const struct genl_ops * ops; + const struct genl_multicast_group *mcgrps; + unsigned int n_ops; + unsigned int n_mcgrps; unsigned int mcgrp_offset; /* private */ - struct list_head family_list; /* private */ struct module *module; }; +struct nlattr **genl_family_attrbuf(const struct genl_family *family); + /** * struct genl_info - receiving information * @snd_seq: sending sequence number @@ -130,64 +131,13 @@ struct genl_ops { u8 flags; }; -int __genl_register_family(struct genl_family *family); - -static inline int genl_register_family(struct genl_family *family) -{ - family->module = THIS_MODULE; - return __genl_register_family(family); -} - -/** - * genl_register_family_with_ops - register a generic netlink family with ops - * @family: generic netlink family - * @ops: operations to be registered - * @n_ops: number of elements to register - * - * Registers the specified family and operations from the specified table. - * Only one family may be registered with the same family name or identifier. - * - * The family id may equal GENL_ID_GENERATE causing an unique id to - * be automatically generated and assigned. - * - * Either a doit or dumpit callback must be specified for every registered - * operation or the function will fail. Only one operation structure per - * command identifier may be registered. - * - * See include/net/genetlink.h for more documenation on the operations - * structure. - * - * Return 0 on success or a negative error code. - */ -static inline int -_genl_register_family_with_ops_grps(struct genl_family *family, - const struct genl_ops *ops, size_t n_ops, - const struct genl_multicast_group *mcgrps, - size_t n_mcgrps) -{ - family->module = THIS_MODULE; - family->ops = ops; - family->n_ops = n_ops; - family->mcgrps = mcgrps; - family->n_mcgrps = n_mcgrps; - return __genl_register_family(family); -} - -#define genl_register_family_with_ops(family, ops) \ - _genl_register_family_with_ops_grps((family), \ - (ops), ARRAY_SIZE(ops), \ - NULL, 0) -#define genl_register_family_with_ops_groups(family, ops, grps) \ - _genl_register_family_with_ops_grps((family), \ - (ops), ARRAY_SIZE(ops), \ - (grps), ARRAY_SIZE(grps)) - -int genl_unregister_family(struct genl_family *family); -void genl_notify(struct genl_family *family, struct sk_buff *skb, +int genl_register_family(struct genl_family *family); +int genl_unregister_family(const struct genl_family *family); +void genl_notify(const struct genl_family *family, struct sk_buff *skb, struct genl_info *info, u32 group, gfp_t flags); void *genlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, - struct genl_family *family, int flags, u8 cmd); + const struct genl_family *family, int flags, u8 cmd); /** * genlmsg_nlhdr - Obtain netlink header from user specified header @@ -196,8 +146,8 @@ void *genlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, * * Returns pointer to netlink header. */ -static inline struct nlmsghdr *genlmsg_nlhdr(void *user_hdr, - struct genl_family *family) +static inline struct nlmsghdr * +genlmsg_nlhdr(void *user_hdr, const struct genl_family *family) { return (struct nlmsghdr *)((char *)user_hdr - family->hdrsize - @@ -233,7 +183,7 @@ static inline int genlmsg_parse(const struct nlmsghdr *nlh, */ static inline void genl_dump_check_consistent(struct netlink_callback *cb, void *user_hdr, - struct genl_family *family) + const struct genl_family *family) { nl_dump_check_consistent(cb, genlmsg_nlhdr(user_hdr, family)); } @@ -250,7 +200,7 @@ static inline void genl_dump_check_consistent(struct netlink_callback *cb, */ static inline void *genlmsg_put_reply(struct sk_buff *skb, struct genl_info *info, - struct genl_family *family, + const struct genl_family *family, int flags, u8 cmd) { return genlmsg_put(skb, info->snd_portid, info->snd_seq, family, @@ -287,7 +237,7 @@ static inline void genlmsg_cancel(struct sk_buff *skb, void *hdr) * @group: offset of multicast group in groups array * @flags: allocation flags */ -static inline int genlmsg_multicast_netns(struct genl_family *family, +static inline int genlmsg_multicast_netns(const struct genl_family *family, struct net *net, struct sk_buff *skb, u32 portid, unsigned int group, gfp_t flags) { @@ -305,7 +255,7 @@ static inline int genlmsg_multicast_netns(struct genl_family *family, * @group: offset of multicast group in groups array * @flags: allocation flags */ -static inline int genlmsg_multicast(struct genl_family *family, +static inline int genlmsg_multicast(const struct genl_family *family, struct sk_buff *skb, u32 portid, unsigned int group, gfp_t flags) { @@ -323,7 +273,7 @@ static inline int genlmsg_multicast(struct genl_family *family, * * This function must hold the RTNL or rcu_read_lock(). */ -int genlmsg_multicast_allns(struct genl_family *family, +int genlmsg_multicast_allns(const struct genl_family *family, struct sk_buff *skb, u32 portid, unsigned int group, gfp_t flags); @@ -407,8 +357,9 @@ static inline struct sk_buff *genlmsg_new(size_t payload, gfp_t flags) * This function returns the number of broadcast listeners that have set the * NETLINK_RECV_NO_ENOBUFS socket option. */ -static inline int genl_set_err(struct genl_family *family, struct net *net, - u32 portid, u32 group, int code) +static inline int genl_set_err(const struct genl_family *family, + struct net *net, u32 portid, + u32 group, int code) { if (WARN_ON_ONCE(group >= family->n_mcgrps)) return -EINVAL; @@ -416,7 +367,7 @@ static inline int genl_set_err(struct genl_family *family, struct net *net, return netlink_set_err(net->genl_sock, portid, group, code); } -static inline int genl_has_listeners(struct genl_family *family, +static inline int genl_has_listeners(const struct genl_family *family, struct net *net, unsigned int group) { if (WARN_ON_ONCE(group >= family->n_mcgrps)) diff --git a/include/net/gro_cells.h b/include/net/gro_cells.h index d15214d673b2..2a1abbf8da74 100644 --- a/include/net/gro_cells.h +++ b/include/net/gro_cells.h @@ -68,6 +68,9 @@ static inline int gro_cells_init(struct gro_cells *gcells, struct net_device *de struct gro_cell *cell = per_cpu_ptr(gcells->cells, i); __skb_queue_head_init(&cell->napi_skbs); + + set_bit(NAPI_STATE_NO_BUSY_POLL, &cell->napi.state); + netif_napi_add(dev, &cell->napi, gro_cell_poll, 64); napi_enable(&cell->napi); } diff --git a/include/net/ieee80211_radiotap.h b/include/net/ieee80211_radiotap.h index ba07b9d8ed63..d0e7e3f8e67a 100644 --- a/include/net/ieee80211_radiotap.h +++ b/include/net/ieee80211_radiotap.h @@ -333,9 +333,9 @@ enum ieee80211_radiotap_type { #define IEEE80211_RADIOTAP_TIMESTAMP_UNIT_NS 0x0003 #define IEEE80211_RADIOTAP_TIMESTAMP_SPOS_MASK 0x00F0 #define IEEE80211_RADIOTAP_TIMESTAMP_SPOS_BEGIN_MDPU 0x0000 -#define IEEE80211_RADIOTAP_TIMESTAMP_SPOS_EO_MPDU 0x0010 +#define IEEE80211_RADIOTAP_TIMESTAMP_SPOS_PLCP_SIG_ACQ 0x0010 #define IEEE80211_RADIOTAP_TIMESTAMP_SPOS_EO_PPDU 0x0020 -#define IEEE80211_RADIOTAP_TIMESTAMP_SPOS_PLCP_SIG_ACQ 0x0030 +#define IEEE80211_RADIOTAP_TIMESTAMP_SPOS_EO_MPDU 0x0030 #define IEEE80211_RADIOTAP_TIMESTAMP_SPOS_UNKNOWN 0x00F0 #define IEEE80211_RADIOTAP_TIMESTAMP_FLAG_64BIT 0x00 diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h index b0576cb2ab25..0fa4c324b713 100644 --- a/include/net/if_inet6.h +++ b/include/net/if_inet6.h @@ -55,6 +55,7 @@ struct inet6_ifaddr { __u8 stable_privacy_retry; __u16 scope; + __u64 dad_nonce; unsigned long cstamp; /* created timestamp */ unsigned long tstamp; /* updated timestamp */ diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 197a30d221e9..146054ceea8e 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -289,11 +289,6 @@ static inline int inet_csk_reqsk_queue_len(const struct sock *sk) return reqsk_queue_len(&inet_csk(sk)->icsk_accept_queue); } -static inline int inet_csk_reqsk_queue_young(const struct sock *sk) -{ - return reqsk_queue_len_young(&inet_csk(sk)->icsk_accept_queue); -} - static inline int inet_csk_reqsk_queue_is_full(const struct sock *sk) { return inet_csk_reqsk_queue_len(sk) >= sk->sk_max_ack_backlog; diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 236a81034fef..c9cff977a7fb 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -228,6 +228,7 @@ struct inet_sock { #define IP_CMSG_PASSSEC BIT(5) #define IP_CMSG_ORIGDSTADDR BIT(6) #define IP_CMSG_CHECKSUM BIT(7) +#define IP_CMSG_RECVFRAGSIZE BIT(8) /** * sk_to_full_sk - Access to a full socket diff --git a/include/net/ip.h b/include/net/ip.h index d3a107850a41..ab6761a7c883 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -178,6 +178,7 @@ struct ip_reply_arg { /* -1 if not needed */ int bound_dev_if; u8 tos; + kuid_t uid; }; #define IP_REPLY_ARG_NOSRCCHECK 1 @@ -577,7 +578,8 @@ int ip_options_rcv_srr(struct sk_buff *skb); */ void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb); -void ip_cmsg_recv_offset(struct msghdr *msg, struct sk_buff *skb, int tlen, int offset); +void ip_cmsg_recv_offset(struct msghdr *msg, struct sock *sk, + struct sk_buff *skb, int tlen, int offset); int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc, bool allow_ipv6); int ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval, @@ -599,7 +601,7 @@ void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 dport, static inline void ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb) { - ip_cmsg_recv_offset(msg, skb, 0, 0); + ip_cmsg_recv_offset(msg, skb->sk, skb, 0, 0); } bool icmp_global_allow(void); diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index f83e78d071a3..9dc2c182a263 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -140,9 +140,10 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, const struct in6_addr *gwaddr); void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, int oif, - u32 mark); + u32 mark, kuid_t uid); void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu); -void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark); +void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark, + kuid_t uid); void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif, u32 mark); void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk); diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index b9314b48e39f..5f376af377c7 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -221,7 +221,8 @@ enum fib_event_type { FIB_EVENT_RULE_DEL, }; -int register_fib_notifier(struct notifier_block *nb); +int register_fib_notifier(struct notifier_block *nb, + void (*cb)(struct notifier_block *nb)); int unregister_fib_notifier(struct notifier_block *nb); int call_fib_notifiers(struct net *net, enum fib_event_type event_type, struct fib_notifier_info *info); @@ -243,6 +244,7 @@ int fib_table_dump(struct fib_table *table, struct sk_buff *skb, struct netlink_callback *cb); int fib_table_flush(struct net *net, struct fib_table *table); struct fib_table *fib_trie_unmerge(struct fib_table *main_tb); +void fib_table_flush_external(struct fib_table *table); void fib_free_table(struct fib_table *tb); #ifndef CONFIG_IP_MULTIPLE_TABLES @@ -396,6 +398,11 @@ static inline void fib_combine_itag(u32 *itag, const struct fib_result *res) void free_fib_info(struct fib_info *fi); +static inline void fib_info_hold(struct fib_info *fi) +{ + atomic_inc(&fi->fib_clntref); +} + static inline void fib_info_put(struct fib_info *fi) { if (atomic_dec_and_test(&fi->fib_clntref)) diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 59557c07904b..e893fe43dd13 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -129,7 +129,7 @@ struct ip_tunnel { #endif struct ip_tunnel_prl_entry __rcu *prl; /* potential router list */ unsigned int prl_count; /* # of entries in PRL */ - int ip_tnl_net_id; + unsigned int ip_tnl_net_id; struct gro_cells gro_cells; bool collect_md; bool ignore_df; @@ -248,7 +248,7 @@ void ip_tunnel_uninit(struct net_device *dev); void ip_tunnel_dellink(struct net_device *dev, struct list_head *head); struct net *ip_tunnel_get_link_net(const struct net_device *dev); int ip_tunnel_get_iflink(const struct net_device *dev); -int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id, +int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id, struct rtnl_link_ops *ops, char *devname); void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops); @@ -275,7 +275,7 @@ int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[], struct ip_tunnel_parm *p); int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[], struct ip_tunnel_parm *p); -void ip_tunnel_setup(struct net_device *dev, int net_id); +void ip_tunnel_setup(struct net_device *dev, unsigned int net_id); struct ip_tunnel_encap_ops { size_t (*encap_hlen)(struct ip_tunnel_encap *e); diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 8fed1cd78658..487e57391664 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -932,7 +932,8 @@ int ip6_local_out(struct net *net, struct sock *sk, struct sk_buff *skb); */ void ipv6_push_nfrag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt, - u8 *proto, struct in6_addr **daddr_p); + u8 *proto, struct in6_addr **daddr_p, + struct in6_addr *saddr); void ipv6_push_frag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt, u8 *proto); @@ -970,6 +971,8 @@ int compat_ipv6_setsockopt(struct sock *sk, int level, int optname, int compat_ipv6_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen); +int __ip6_datagram_connect(struct sock *sk, struct sockaddr *addr, + int addr_len); int ip6_datagram_connect(struct sock *sk, struct sockaddr *addr, int addr_len); int ip6_datagram_connect_v6_only(struct sock *sk, struct sockaddr *addr, int addr_len); diff --git a/include/net/lwtunnel.h b/include/net/lwtunnel.h index ea3f80f58fd6..d4c1c75b8862 100644 --- a/include/net/lwtunnel.h +++ b/include/net/lwtunnel.h @@ -24,11 +24,11 @@ enum { struct lwtunnel_state { __u16 type; __u16 flags; + __u16 headroom; atomic_t refcnt; int (*orig_output)(struct net *net, struct sock *sk, struct sk_buff *skb); int (*orig_input)(struct sk_buff *); - int len; - __u16 headroom; + struct rcu_head rcu; __u8 data[0]; }; @@ -36,6 +36,7 @@ struct lwtunnel_encap_ops { int (*build_state)(struct net_device *dev, struct nlattr *encap, unsigned int family, const void *cfg, struct lwtunnel_state **ts); + void (*destroy_state)(struct lwtunnel_state *lws); int (*output)(struct net *net, struct sock *sk, struct sk_buff *skb); int (*input)(struct sk_buff *skb); int (*fill_encap)(struct sk_buff *skb, @@ -46,10 +47,7 @@ struct lwtunnel_encap_ops { }; #ifdef CONFIG_LWTUNNEL -static inline void lwtstate_free(struct lwtunnel_state *lws) -{ - kfree(lws); -} +void lwtstate_free(struct lwtunnel_state *lws); static inline struct lwtunnel_state * lwtstate_get(struct lwtunnel_state *lws) @@ -96,7 +94,8 @@ static inline bool lwtunnel_xmit_redirect(struct lwtunnel_state *lwtstate) static inline unsigned int lwtunnel_headroom(struct lwtunnel_state *lwtstate, unsigned int mtu) { - if (lwtunnel_xmit_redirect(lwtstate) && lwtstate->headroom < mtu) + if ((lwtunnel_xmit_redirect(lwtstate) || + lwtunnel_output_redirect(lwtstate)) && lwtstate->headroom < mtu) return lwtstate->headroom; return 0; diff --git a/include/net/mac80211.h b/include/net/mac80211.h index e2dba93e374f..5345d358a510 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1442,7 +1442,7 @@ enum ieee80211_vif_flags { struct ieee80211_vif { enum nl80211_iftype type; struct ieee80211_bss_conf bss_conf; - u8 addr[ETH_ALEN]; + u8 addr[ETH_ALEN] __aligned(2); bool p2p; bool csa_active; bool mu_mimo_owner; @@ -1749,7 +1749,8 @@ struct ieee80211_sta_rates { * @drv_priv: data area for driver use, will always be aligned to * sizeof(void \*), size is determined in hw information. * @uapsd_queues: bitmap of queues configured for uapsd. Only valid - * if wme is supported. + * if wme is supported. The bits order is like in + * IEEE80211_WMM_IE_STA_QOSINFO_AC_*. * @max_sp: max Service Period. Only valid if wme is supported. * @bandwidth: current bandwidth the station can receive with * @rx_nss: in HT/VHT, the maximum number of spatial streams the @@ -2029,6 +2030,10 @@ struct ieee80211_txq { * drivers, mac80211 packet loss mechanism will not be triggered and driver * is completely depending on firmware event for station kickout. * + * @IEEE80211_HW_SUPPORTS_TX_FRAG: Hardware does fragmentation by itself. + * The stack will not do fragmentation. + * The callback for @set_frag_threshold should be set as well. + * * @NUM_IEEE80211_HW_FLAGS: number of hardware flags, used for sizing arrays */ enum ieee80211_hw_flags { @@ -2070,6 +2075,7 @@ enum ieee80211_hw_flags { IEEE80211_HW_TX_AMSDU, IEEE80211_HW_TX_FRAG_LIST, IEEE80211_HW_REPORTS_LOW_ACK, + IEEE80211_HW_SUPPORTS_TX_FRAG, /* keep last, obviously */ NUM_IEEE80211_HW_FLAGS @@ -3098,8 +3104,9 @@ enum ieee80211_reconfig_type { * The callback must be atomic. * * @set_frag_threshold: Configuration of fragmentation threshold. Assign this - * if the device does fragmentation by itself; if this callback is - * implemented then the stack will not do fragmentation. + * if the device does fragmentation by itself. Note that to prevent the + * stack from doing fragmentation IEEE80211_HW_SUPPORTS_TX_FRAG + * should be set as well. * The callback can sleep. * * @set_rts_threshold: Configuration of RTS threshold (if device needs it) @@ -4092,6 +4099,10 @@ void ieee80211_sta_pspoll(struct ieee80211_sta *sta); * This must be used in conjunction with ieee80211_sta_ps_transition() * and possibly ieee80211_sta_pspoll(); calls to all three must be * serialized. + * %IEEE80211_NUM_TIDS can be passed as the tid if the tid is unknown. + * In this case, mac80211 will not check that this tid maps to an AC + * that is trigger enabled and assume that the caller did the proper + * checks. */ void ieee80211_sta_uapsd_trigger(struct ieee80211_sta *sta, u8 tid); diff --git a/include/net/ndisc.h b/include/net/ndisc.h index be1fe2283254..d562a2fe4860 100644 --- a/include/net/ndisc.h +++ b/include/net/ndisc.h @@ -31,6 +31,7 @@ enum { ND_OPT_PREFIX_INFO = 3, /* RFC2461 */ ND_OPT_REDIRECT_HDR = 4, /* RFC2461 */ ND_OPT_MTU = 5, /* RFC2461 */ + ND_OPT_NONCE = 14, /* RFC7527 */ __ND_OPT_ARRAY_MAX, ND_OPT_ROUTE_INFO = 24, /* RFC4191 */ ND_OPT_RDNSS = 25, /* RFC5006 */ @@ -121,6 +122,7 @@ struct ndisc_options { #define nd_opts_pi_end nd_opt_array[__ND_OPT_PREFIX_INFO_END] #define nd_opts_rh nd_opt_array[ND_OPT_REDIRECT_HDR] #define nd_opts_mtu nd_opt_array[ND_OPT_MTU] +#define nd_opts_nonce nd_opt_array[ND_OPT_NONCE] #define nd_802154_opts_src_lladdr nd_802154_opt_array[ND_OPT_SOURCE_LL_ADDR] #define nd_802154_opts_tgt_lladdr nd_802154_opt_array[ND_OPT_TARGET_LL_ADDR] @@ -398,7 +400,8 @@ void ndisc_cleanup(void); int ndisc_rcv(struct sk_buff *skb); void ndisc_send_ns(struct net_device *dev, const struct in6_addr *solicit, - const struct in6_addr *daddr, const struct in6_addr *saddr); + const struct in6_addr *daddr, const struct in6_addr *saddr, + u64 nonce); void ndisc_send_rs(struct net_device *dev, const struct in6_addr *saddr, const struct in6_addr *daddr); diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index fc4f757107df..af8fe8a909dc 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -170,7 +170,7 @@ static inline struct net *copy_net_ns(unsigned long flags, extern struct list_head net_namespace_list; struct net *get_net_ns_by_pid(pid_t pid); -struct net *get_net_ns_by_fd(int pid); +struct net *get_net_ns_by_fd(int fd); #ifdef CONFIG_SYSCTL void ipx_register_sysctl(void); @@ -291,7 +291,7 @@ struct pernet_operations { int (*init)(struct net *net); void (*exit)(struct net *net); void (*exit_batch)(struct list_head *net_exit_list); - int *id; + unsigned int *id; size_t size; }; diff --git a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h index 981c327374da..919e4e8af327 100644 --- a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h +++ b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h @@ -15,6 +15,15 @@ extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4; extern struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4; extern struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4; extern struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp; +#ifdef CONFIG_NF_CT_PROTO_DCCP +extern struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4; +#endif +#ifdef CONFIG_NF_CT_PROTO_SCTP +extern struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4; +#endif +#ifdef CONFIG_NF_CT_PROTO_UDPLITE +extern struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4; +#endif int nf_conntrack_ipv4_compat_init(void); void nf_conntrack_ipv4_compat_fini(void); diff --git a/include/net/netfilter/ipv4/nf_defrag_ipv4.h b/include/net/netfilter/ipv4/nf_defrag_ipv4.h index f01ef208dff6..db405f70e538 100644 --- a/include/net/netfilter/ipv4/nf_defrag_ipv4.h +++ b/include/net/netfilter/ipv4/nf_defrag_ipv4.h @@ -1,6 +1,7 @@ #ifndef _NF_DEFRAG_IPV4_H #define _NF_DEFRAG_IPV4_H -void nf_defrag_ipv4_enable(void); +struct net; +int nf_defrag_ipv4_enable(struct net *); #endif /* _NF_DEFRAG_IPV4_H */ diff --git a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h index a4c993685795..eaea968f8657 100644 --- a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h +++ b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h @@ -6,6 +6,15 @@ extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6; extern struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6; extern struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6; extern struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6; +#ifdef CONFIG_NF_CT_PROTO_DCCP +extern struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6; +#endif +#ifdef CONFIG_NF_CT_PROTO_SCTP +extern struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6; +#endif +#ifdef CONFIG_NF_CT_PROTO_UDPLITE +extern struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6; +#endif #include <linux/sysctl.h> extern struct ctl_table nf_ct_ipv6_sysctl_table[]; diff --git a/include/net/netfilter/ipv6/nf_defrag_ipv6.h b/include/net/netfilter/ipv6/nf_defrag_ipv6.h index ddf162f7966f..7664efe37974 100644 --- a/include/net/netfilter/ipv6/nf_defrag_ipv6.h +++ b/include/net/netfilter/ipv6/nf_defrag_ipv6.h @@ -1,7 +1,8 @@ #ifndef _NF_DEFRAG_IPV6_H #define _NF_DEFRAG_IPV6_H -void nf_defrag_ipv6_enable(void); +struct net; +int nf_defrag_ipv6_enable(struct net *); int nf_ct_frag6_init(void); void nf_ct_frag6_cleanup(void); diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 50418052a520..5916aa9ab3f0 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -100,6 +100,9 @@ struct nf_conn { possible_net_t ct_net; +#if IS_ENABLED(CONFIG_NF_NAT) + struct rhlist_head nat_bysource; +#endif /* all members below initialized via memset */ u8 __nfct_init_offset[0]; @@ -117,9 +120,6 @@ struct nf_conn { /* Extensions */ struct nf_ct_ext *ext; -#if IS_ENABLED(CONFIG_NF_NAT) - struct rhash_head nat_bysource; -#endif /* Storage reserved for other modules, must be the last member */ union nf_conntrack_proto proto; }; @@ -181,6 +181,10 @@ static inline void nf_ct_put(struct nf_conn *ct) int nf_ct_l3proto_try_module_get(unsigned short l3proto); void nf_ct_l3proto_module_put(unsigned short l3proto); +/* load module; enable/disable conntrack in this namespace */ +int nf_ct_netns_get(struct net *net, u8 nfproto); +void nf_ct_netns_put(struct net *net, u8 nfproto); + /* * Allocate a hashtable of hlist_head (if nulls == 0), * or hlist_nulls_head (if nulls == 1) diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h index 8992e4229da9..e01559b4d781 100644 --- a/include/net/netfilter/nf_conntrack_l3proto.h +++ b/include/net/netfilter/nf_conntrack_l3proto.h @@ -52,6 +52,10 @@ struct nf_conntrack_l3proto { int (*tuple_to_nlattr)(struct sk_buff *skb, const struct nf_conntrack_tuple *t); + /* Called when netns wants to use connection tracking */ + int (*net_ns_get)(struct net *); + void (*net_ns_put)(struct net *); + /* * Calculate size of tuple nlattr */ @@ -63,18 +67,24 @@ struct nf_conntrack_l3proto { size_t nla_size; - /* Init l3proto pernet data */ - int (*init_net)(struct net *net); - /* Module (if any) which this is connected to. */ struct module *me; }; extern struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[AF_MAX]; +#ifdef CONFIG_SYSCTL /* Protocol pernet registration. */ int nf_ct_l3proto_pernet_register(struct net *net, struct nf_conntrack_l3proto *proto); +#else +static inline int nf_ct_l3proto_pernet_register(struct net *n, + struct nf_conntrack_l3proto *p) +{ + return 0; +} +#endif + void nf_ct_l3proto_pernet_unregister(struct net *net, struct nf_conntrack_l3proto *proto); diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index de629f1520df..e7b836590f0b 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -98,7 +98,7 @@ struct nf_conntrack_l4proto { const struct nla_policy *nla_policy; } ctnl_timeout; #endif - int *net_id; + unsigned int *net_id; /* Init l4proto pernet data */ int (*init_net)(struct net *net, u_int16_t proto); @@ -125,14 +125,24 @@ struct nf_conntrack_l4proto *nf_ct_l4proto_find_get(u_int16_t l3proto, void nf_ct_l4proto_put(struct nf_conntrack_l4proto *p); /* Protocol pernet registration. */ +int nf_ct_l4proto_pernet_register_one(struct net *net, + struct nf_conntrack_l4proto *proto); +void nf_ct_l4proto_pernet_unregister_one(struct net *net, + struct nf_conntrack_l4proto *proto); int nf_ct_l4proto_pernet_register(struct net *net, - struct nf_conntrack_l4proto *proto); + struct nf_conntrack_l4proto *proto[], + unsigned int num_proto); void nf_ct_l4proto_pernet_unregister(struct net *net, - struct nf_conntrack_l4proto *proto); + struct nf_conntrack_l4proto *proto[], + unsigned int num_proto); /* Protocol global registration. */ -int nf_ct_l4proto_register(struct nf_conntrack_l4proto *proto); -void nf_ct_l4proto_unregister(struct nf_conntrack_l4proto *proto); +int nf_ct_l4proto_register_one(struct nf_conntrack_l4proto *proto); +void nf_ct_l4proto_unregister_one(struct nf_conntrack_l4proto *proto); +int nf_ct_l4proto_register(struct nf_conntrack_l4proto *proto[], + unsigned int num_proto); +void nf_ct_l4proto_unregister(struct nf_conntrack_l4proto *proto[], + unsigned int num_proto); /* Generic netlink helpers */ int nf_ct_port_tuple_to_nlattr(struct sk_buff *skb, diff --git a/include/net/netfilter/nf_conntrack_synproxy.h b/include/net/netfilter/nf_conntrack_synproxy.h index e6937318546c..b0ca402c1f72 100644 --- a/include/net/netfilter/nf_conntrack_synproxy.h +++ b/include/net/netfilter/nf_conntrack_synproxy.h @@ -54,7 +54,7 @@ struct synproxy_net { struct synproxy_stats __percpu *stats; }; -extern int synproxy_net_id; +extern unsigned int synproxy_net_id; static inline struct synproxy_net *synproxy_pernet(struct net *net) { return net_generic(net, synproxy_net_id); diff --git a/include/net/netfilter/nf_dup_netdev.h b/include/net/netfilter/nf_dup_netdev.h index 397dcae349f9..3e919356bedf 100644 --- a/include/net/netfilter/nf_dup_netdev.h +++ b/include/net/netfilter/nf_dup_netdev.h @@ -2,5 +2,6 @@ #define _NF_DUP_NETDEV_H_ void nf_dup_netdev_egress(const struct nft_pktinfo *pkt, int oif); +void nf_fwd_netdev_egress(const struct nft_pktinfo *pkt, int oif); #endif diff --git a/include/net/netfilter/nf_log.h b/include/net/netfilter/nf_log.h index 309cd267be4f..450f87f95415 100644 --- a/include/net/netfilter/nf_log.h +++ b/include/net/netfilter/nf_log.h @@ -109,5 +109,12 @@ void nf_log_dump_packet_common(struct nf_log_buf *m, u_int8_t pf, const struct net_device *out, const struct nf_loginfo *loginfo, const char *prefix); +void nf_log_l2packet(struct net *net, u_int8_t pf, + __be16 protocol, + unsigned int hooknum, + const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const struct nf_loginfo *loginfo, const char *prefix); #endif /* _NF_LOG_H */ diff --git a/include/net/netfilter/nf_nat_l4proto.h b/include/net/netfilter/nf_nat_l4proto.h index 12f4cc841b6e..3923150f2a1e 100644 --- a/include/net/netfilter/nf_nat_l4proto.h +++ b/include/net/netfilter/nf_nat_l4proto.h @@ -54,6 +54,15 @@ extern const struct nf_nat_l4proto nf_nat_l4proto_udp; extern const struct nf_nat_l4proto nf_nat_l4proto_icmp; extern const struct nf_nat_l4proto nf_nat_l4proto_icmpv6; extern const struct nf_nat_l4proto nf_nat_l4proto_unknown; +#ifdef CONFIG_NF_NAT_PROTO_DCCP +extern const struct nf_nat_l4proto nf_nat_l4proto_dccp; +#endif +#ifdef CONFIG_NF_NAT_PROTO_SCTP +extern const struct nf_nat_l4proto nf_nat_l4proto_sctp; +#endif +#ifdef CONFIG_NF_NAT_PROTO_UDPLITE +extern const struct nf_nat_l4proto nf_nat_l4proto_udplite; +#endif bool nf_nat_l4proto_in_range(const struct nf_conntrack_tuple *tuple, enum nf_nat_manip_type maniptype, diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h index 2280cfe86c56..09948d10e38e 100644 --- a/include/net/netfilter/nf_queue.h +++ b/include/net/netfilter/nf_queue.h @@ -12,6 +12,7 @@ struct nf_queue_entry { unsigned int id; struct nf_hook_state state; + struct nf_hook_entry *hook; u16 size; /* sizeof(entry) + saved route keys */ /* extra space to store route keys */ diff --git a/include/net/netfilter/nf_socket.h b/include/net/netfilter/nf_socket.h new file mode 100644 index 000000000000..f2fc39c97d43 --- /dev/null +++ b/include/net/netfilter/nf_socket.h @@ -0,0 +1,27 @@ +#ifndef _NF_SOCK_H_ +#define _NF_SOCK_H_ + +struct net_device; +struct sk_buff; +struct sock; +struct net; + +static inline bool nf_sk_is_transparent(struct sock *sk) +{ + switch (sk->sk_state) { + case TCP_TIME_WAIT: + return inet_twsk(sk)->tw_transparent; + case TCP_NEW_SYN_RECV: + return inet_rsk(inet_reqsk(sk))->no_srccheck; + default: + return inet_sk(sk)->transparent; + } +} + +struct sock *nf_sk_lookup_slow_v4(struct net *net, const struct sk_buff *skb, + const struct net_device *indev); + +struct sock *nf_sk_lookup_slow_v6(struct net *net, const struct sk_buff *skb, + const struct net_device *indev); + +#endif diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index d79d1e9b9546..924325c46aab 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -14,27 +14,43 @@ struct nft_pktinfo { struct sk_buff *skb; - struct net *net; - const struct net_device *in; - const struct net_device *out; - u8 pf; - u8 hook; bool tprot_set; u8 tprot; /* for x_tables compatibility */ struct xt_action_param xt; }; +static inline struct net *nft_net(const struct nft_pktinfo *pkt) +{ + return pkt->xt.state->net; +} + +static inline unsigned int nft_hook(const struct nft_pktinfo *pkt) +{ + return pkt->xt.state->hook; +} + +static inline u8 nft_pf(const struct nft_pktinfo *pkt) +{ + return pkt->xt.state->pf; +} + +static inline const struct net_device *nft_in(const struct nft_pktinfo *pkt) +{ + return pkt->xt.state->in; +} + +static inline const struct net_device *nft_out(const struct nft_pktinfo *pkt) +{ + return pkt->xt.state->out; +} + static inline void nft_set_pktinfo(struct nft_pktinfo *pkt, struct sk_buff *skb, const struct nf_hook_state *state) { pkt->skb = skb; - pkt->net = pkt->xt.net = state->net; - pkt->in = pkt->xt.in = state->in; - pkt->out = pkt->xt.out = state->out; - pkt->hook = pkt->xt.hooknum = state->hook; - pkt->pf = pkt->xt.family = state->pf; + pkt->xt.state = state; } static inline void nft_set_pktinfo_proto_unspec(struct nft_pktinfo *pkt, @@ -243,7 +259,8 @@ struct nft_expr; * @lookup: look up an element within the set * @insert: insert new element into set * @activate: activate new element in the next generation - * @deactivate: deactivate element in the next generation + * @deactivate: lookup for element and deactivate it in the next generation + * @deactivate_one: deactivate element in the next generation * @remove: remove element from set * @walk: iterate over all set elemeennts * @privsize: function to return size of set private data @@ -278,6 +295,9 @@ struct nft_set_ops { void * (*deactivate)(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem); + bool (*deactivate_one)(const struct net *net, + const struct nft_set *set, + void *priv); void (*remove)(const struct nft_set *set, const struct nft_set_elem *elem); void (*walk)(const struct nft_ctx *ctx, @@ -310,10 +330,11 @@ void nft_unregister_set(struct nft_set_ops *ops); * @name: name of the set * @ktype: key type (numeric type defined by userspace, not used in the kernel) * @dtype: data type (verdict or numeric type defined by userspace) + * @objtype: object type (see NFT_OBJECT_* definitions) * @size: maximum set size * @nelems: number of elements * @ndeact: number of deactivated elements queued for removal - * @timeout: default timeout value in msecs + * @timeout: default timeout value in jiffies * @gc_int: garbage collection interval in msecs * @policy: set parameterization (see enum nft_set_policies) * @udlen: user data length @@ -331,6 +352,7 @@ struct nft_set { char name[NFT_SET_MAXNAMELEN]; u32 ktype; u32 dtype; + u32 objtype; u32 size; atomic_t nelems; u32 ndeact; @@ -400,6 +422,7 @@ void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, * @NFT_SET_EXT_EXPIRATION: element expiration time * @NFT_SET_EXT_USERDATA: user data associated with the element * @NFT_SET_EXT_EXPR: expression assiociated with the element + * @NFT_SET_EXT_OBJREF: stateful object reference associated with element * @NFT_SET_EXT_NUM: number of extension types */ enum nft_set_extensions { @@ -410,6 +433,7 @@ enum nft_set_extensions { NFT_SET_EXT_EXPIRATION, NFT_SET_EXT_USERDATA, NFT_SET_EXT_EXPR, + NFT_SET_EXT_OBJREF, NFT_SET_EXT_NUM }; @@ -538,6 +562,11 @@ static inline struct nft_set_ext *nft_set_elem_ext(const struct nft_set *set, return elem + set->ops->elemsize; } +static inline struct nft_object **nft_set_ext_obj(const struct nft_set_ext *ext) +{ + return nft_set_ext(ext, NFT_SET_EXT_OBJREF); +} + void *nft_set_elem_init(const struct nft_set *set, const struct nft_set_ext_tmpl *tmpl, const u32 *key, const u32 *data, @@ -859,6 +888,7 @@ unsigned int nft_do_chain(struct nft_pktinfo *pkt, void *priv); * @list: used internally * @chains: chains in the table * @sets: sets in the table + * @objects: stateful objects in the table * @hgenerator: handle generator state * @use: number of chain references to this table * @flags: table flag (see enum nft_table_flags) @@ -869,6 +899,7 @@ struct nft_table { struct list_head list; struct list_head chains; struct list_head sets; + struct list_head objects; u64 hgenerator; u32 use; u16 flags:14, @@ -919,6 +950,80 @@ int nft_verdict_dump(struct sk_buff *skb, int type, const struct nft_verdict *v); /** + * struct nft_object - nf_tables stateful object + * + * @list: table stateful object list node + * @table: table this object belongs to + * @type: pointer to object type + * @data: pointer to object data + * @name: name of this stateful object + * @genmask: generation mask + * @use: number of references to this stateful object + * @data: object data, layout depends on type + */ +struct nft_object { + struct list_head list; + char name[NFT_OBJ_MAXNAMELEN]; + struct nft_table *table; + u32 genmask:2, + use:30; + /* runtime data below here */ + const struct nft_object_type *type ____cacheline_aligned; + unsigned char data[] + __attribute__((aligned(__alignof__(u64)))); +}; + +static inline void *nft_obj_data(const struct nft_object *obj) +{ + return (void *)obj->data; +} + +#define nft_expr_obj(expr) *((struct nft_object **)nft_expr_priv(expr)) + +struct nft_object *nf_tables_obj_lookup(const struct nft_table *table, + const struct nlattr *nla, u32 objtype, + u8 genmask); + +int nft_obj_notify(struct net *net, struct nft_table *table, + struct nft_object *obj, u32 portid, u32 seq, + int event, int family, int report, gfp_t gfp); + +/** + * struct nft_object_type - stateful object type + * + * @eval: stateful object evaluation function + * @list: list node in list of object types + * @type: stateful object numeric type + * @size: stateful object size + * @owner: module owner + * @maxattr: maximum netlink attribute + * @policy: netlink attribute policy + * @init: initialize object from netlink attributes + * @destroy: release existing stateful object + * @dump: netlink dump stateful object + */ +struct nft_object_type { + void (*eval)(struct nft_object *obj, + struct nft_regs *regs, + const struct nft_pktinfo *pkt); + struct list_head list; + u32 type; + unsigned int size; + unsigned int maxattr; + struct module *owner; + const struct nla_policy *policy; + int (*init)(const struct nlattr * const tb[], + struct nft_object *obj); + void (*destroy)(struct nft_object *obj); + int (*dump)(struct sk_buff *skb, + struct nft_object *obj, + bool reset); +}; + +int nft_register_obj(struct nft_object_type *obj_type); +void nft_unregister_obj(struct nft_object_type *obj_type); + +/** * struct nft_traceinfo - nft tracing information and state * * @pkt: pktinfo currently processed @@ -965,6 +1070,9 @@ void nft_trace_notify(struct nft_traceinfo *info); #define MODULE_ALIAS_NFT_SET() \ MODULE_ALIAS("nft-set") +#define MODULE_ALIAS_NFT_OBJ(type) \ + MODULE_ALIAS("nft-obj-" __stringify(type)) + /* * The gencursor defines two generations, the currently active and the * next one. Objects contain a bitmask of 2 bits specifying the generations @@ -1141,4 +1249,11 @@ struct nft_trans_elem { #define nft_trans_elem(trans) \ (((struct nft_trans_elem *)trans->data)->elem) +struct nft_trans_obj { + struct nft_object *obj; +}; + +#define nft_trans_obj(trans) \ + (((struct nft_trans_obj *)trans->data)->obj) + #endif /* _NET_NF_TABLES_H */ diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h index 00f4f6b1b1ba..8f690effec37 100644 --- a/include/net/netfilter/nf_tables_core.h +++ b/include/net/netfilter/nf_tables_core.h @@ -1,12 +1,18 @@ #ifndef _NET_NF_TABLES_CORE_H #define _NET_NF_TABLES_CORE_H +extern struct nft_expr_type nft_imm_type; +extern struct nft_expr_type nft_cmp_type; +extern struct nft_expr_type nft_lookup_type; +extern struct nft_expr_type nft_bitwise_type; +extern struct nft_expr_type nft_byteorder_type; +extern struct nft_expr_type nft_payload_type; +extern struct nft_expr_type nft_dynset_type; +extern struct nft_expr_type nft_range_type; + int nf_tables_core_module_init(void); void nf_tables_core_module_exit(void); -int nft_immediate_module_init(void); -void nft_immediate_module_exit(void); - struct nft_cmp_fast_expr { u32 data; enum nft_registers sreg:8; @@ -25,24 +31,6 @@ static inline u32 nft_cmp_fast_mask(unsigned int len) extern const struct nft_expr_ops nft_cmp_fast_ops; -int nft_cmp_module_init(void); -void nft_cmp_module_exit(void); - -int nft_range_module_init(void); -void nft_range_module_exit(void); - -int nft_lookup_module_init(void); -void nft_lookup_module_exit(void); - -int nft_dynset_module_init(void); -void nft_dynset_module_exit(void); - -int nft_bitwise_module_init(void); -void nft_bitwise_module_exit(void); - -int nft_byteorder_module_init(void); -void nft_byteorder_module_exit(void); - struct nft_payload { enum nft_payload_bases base:8; u8 offset; @@ -57,12 +45,10 @@ struct nft_payload_set { enum nft_registers sreg:8; u8 csum_type; u8 csum_offset; + u8 csum_flags; }; extern const struct nft_expr_ops nft_payload_fast_ops; extern struct static_key_false nft_trace_enabled; -int nft_payload_module_init(void); -void nft_payload_module_exit(void); - #endif /* _NET_NF_TABLES_CORE_H */ diff --git a/include/net/netfilter/nft_fib.h b/include/net/netfilter/nft_fib.h new file mode 100644 index 000000000000..cbedda077db2 --- /dev/null +++ b/include/net/netfilter/nft_fib.h @@ -0,0 +1,31 @@ +#ifndef _NFT_FIB_H_ +#define _NFT_FIB_H_ + +struct nft_fib { + enum nft_registers dreg:8; + u8 result; + u32 flags; +}; + +extern const struct nla_policy nft_fib_policy[]; + +int nft_fib_dump(struct sk_buff *skb, const struct nft_expr *expr); +int nft_fib_init(const struct nft_ctx *ctx, const struct nft_expr *expr, + const struct nlattr * const tb[]); +int nft_fib_validate(const struct nft_ctx *ctx, const struct nft_expr *expr, + const struct nft_data **data); + + +void nft_fib4_eval_type(const struct nft_expr *expr, struct nft_regs *regs, + const struct nft_pktinfo *pkt); +void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs, + const struct nft_pktinfo *pkt); + +void nft_fib6_eval_type(const struct nft_expr *expr, struct nft_regs *regs, + const struct nft_pktinfo *pkt); +void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs, + const struct nft_pktinfo *pkt); + +void nft_fib_store_result(void *reg, enum nft_fib_result r, + const struct nft_pktinfo *pkt, int index); +#endif diff --git a/include/net/netfilter/xt_rateest.h b/include/net/netfilter/xt_rateest.h index 79f45e19f31e..130e58361f99 100644 --- a/include/net/netfilter/xt_rateest.h +++ b/include/net/netfilter/xt_rateest.h @@ -1,19 +1,23 @@ #ifndef _XT_RATEEST_H #define _XT_RATEEST_H +#include <net/gen_stats.h> + struct xt_rateest { /* keep lock and bstats on same cache line to speedup xt_rateest_tg() */ struct gnet_stats_basic_packed bstats; spinlock_t lock; - /* keep rstats and lock on same cache line to speedup xt_rateest_mt() */ - struct gnet_stats_rate_est64 rstats; + /* following fields not accessed in hot path */ + unsigned int refcnt; struct hlist_node list; char name[IFNAMSIZ]; - unsigned int refcnt; struct gnet_estimator params; struct rcu_head rcu; + + /* keep this field far away to speedup xt_rateest_mt() */ + struct net_rate_estimator __rcu *rate_est; }; struct xt_rateest *xt_rateest_lookup(const char *name); diff --git a/include/net/netlink.h b/include/net/netlink.h index 254a0fc01800..dd657a33f8c3 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -698,8 +698,7 @@ static inline int nla_len(const struct nlattr *nla) */ static inline int nla_ok(const struct nlattr *nla, int remaining) { - return remaining >= (int) sizeof(*nla) && - nla->nla_len >= sizeof(*nla) && + return nla->nla_len >= sizeof(*nla) && nla->nla_len <= remaining; } @@ -713,7 +712,7 @@ static inline int nla_ok(const struct nlattr *nla, int remaining) */ static inline struct nlattr *nla_next(const struct nlattr *nla, int *remaining) { - int totlen = NLA_ALIGN(nla->nla_len); + unsigned int totlen = NLA_ALIGN(nla->nla_len); *remaining -= totlen; return (struct nlattr *) ((char *) nla + totlen); @@ -1191,6 +1190,16 @@ static inline struct in6_addr nla_get_in6_addr(const struct nlattr *nla) } /** + * nla_memdup - duplicate attribute memory (kmemdup) + * @src: netlink attribute to duplicate from + * @gfp: GFP mask + */ +static inline void *nla_memdup(const struct nlattr *src, gfp_t gfp) +{ + return kmemdup(nla_data(src), nla_len(src), gfp); +} + +/** * nla_nest_start - Start a new level of nested attributes * @skb: socket buffer to add attributes to * @attrtype: attribute type of container diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index e469e85de3f9..cf799fc3fdec 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -6,6 +6,12 @@ #include <linux/atomic.h> #include <linux/workqueue.h> #include <linux/netfilter/nf_conntrack_tcp.h> +#ifdef CONFIG_NF_CT_PROTO_DCCP +#include <linux/netfilter/nf_conntrack_dccp.h> +#endif +#ifdef CONFIG_NF_CT_PROTO_SCTP +#include <linux/netfilter/nf_conntrack_sctp.h> +#endif #include <linux/seqlock.h> struct ctl_table_header; @@ -48,12 +54,49 @@ struct nf_icmp_net { unsigned int timeout; }; +#ifdef CONFIG_NF_CT_PROTO_DCCP +struct nf_dccp_net { + struct nf_proto_net pn; + int dccp_loose; + unsigned int dccp_timeout[CT_DCCP_MAX + 1]; +}; +#endif + +#ifdef CONFIG_NF_CT_PROTO_SCTP +struct nf_sctp_net { + struct nf_proto_net pn; + unsigned int timeouts[SCTP_CONNTRACK_MAX]; +}; +#endif + +#ifdef CONFIG_NF_CT_PROTO_UDPLITE +enum udplite_conntrack { + UDPLITE_CT_UNREPLIED, + UDPLITE_CT_REPLIED, + UDPLITE_CT_MAX +}; + +struct nf_udplite_net { + struct nf_proto_net pn; + unsigned int timeouts[UDPLITE_CT_MAX]; +}; +#endif + struct nf_ip_net { struct nf_generic_net generic; struct nf_tcp_net tcp; struct nf_udp_net udp; struct nf_icmp_net icmp; struct nf_icmp_net icmpv6; +#ifdef CONFIG_NF_CT_PROTO_DCCP + struct nf_dccp_net dccp; +#endif +#ifdef CONFIG_NF_CT_PROTO_SCTP + struct nf_sctp_net sctp; +#endif +#ifdef CONFIG_NF_CT_PROTO_UDPLITE + struct nf_udplite_net udplite; +#endif }; struct ct_pcpu { @@ -91,7 +134,6 @@ struct netns_ct { struct nf_ip_net nf_ct_proto; #if defined(CONFIG_NF_CONNTRACK_LABELS) unsigned int labels_used; - u8 label_words; #endif }; #endif diff --git a/include/net/netns/generic.h b/include/net/netns/generic.h index 70e158551704..f15daaa89385 100644 --- a/include/net/netns/generic.h +++ b/include/net/netns/generic.h @@ -25,20 +25,24 @@ */ struct net_generic { - unsigned int len; - struct rcu_head rcu; - - void *ptr[0]; + union { + struct { + unsigned int len; + struct rcu_head rcu; + } s; + + void *ptr[0]; + }; }; -static inline void *net_generic(const struct net *net, int id) +static inline void *net_generic(const struct net *net, unsigned int id) { struct net_generic *ng; void *ptr; rcu_read_lock(); ng = rcu_dereference(net->gen); - ptr = ng->ptr[id - 1]; + ptr = ng->ptr[id]; rcu_read_unlock(); return ptr; diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 7adf4386ac8f..f0cf5a1b777e 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -135,6 +135,9 @@ struct netns_ipv4 { #ifdef CONFIG_IP_ROUTE_MULTIPATH int sysctl_fib_multipath_use_neigh; #endif + + unsigned int fib_seq; /* protected by rtnl_mutex */ + atomic_t rt_genid; }; #endif diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index 10d0848f5b8a..de7745e2edcc 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -85,6 +85,7 @@ struct netns_ipv6 { #endif atomic_t dev_addr_genid; atomic_t fib6_sernum; + struct seg6_pernet_data *seg6_data; }; #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) diff --git a/include/net/netns/netfilter.h b/include/net/netns/netfilter.h index 58487b1cc99a..cea396b53a60 100644 --- a/include/net/netns/netfilter.h +++ b/include/net/netns/netfilter.h @@ -17,5 +17,11 @@ struct netns_nf { struct ctl_table_header *nf_log_dir_header; #endif struct nf_hook_entry __rcu *hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; +#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) + bool defrag_ipv4; +#endif +#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) + bool defrag_ipv6; +#endif }; #endif diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 767b03a3fe67..f0a051480c6c 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -171,6 +171,8 @@ void tcf_exts_change(struct tcf_proto *tp, struct tcf_exts *dst, struct tcf_exts *src); int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts); int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts); +int tcf_exts_get_dev(struct net_device *dev, struct tcf_exts *exts, + struct net_device **hw_dev); /** * struct tcf_pkt_info - packet information @@ -425,16 +427,14 @@ struct tc_cls_u32_offload { }; }; -static inline bool tc_should_offload(const struct net_device *dev, - const struct tcf_proto *tp, u32 flags) +static inline bool tc_can_offload(const struct net_device *dev, + const struct tcf_proto *tp) { const struct Qdisc *sch = tp->q; const struct Qdisc_class_ops *cops = sch->ops->cl_ops; if (!(dev->features & NETIF_F_HW_TC)) return false; - if (flags & TCA_CLS_FLAGS_SKIP_HW) - return false; if (!dev->netdev_ops->ndo_setup_tc) return false; if (cops && cops->tcf_cl_offload) @@ -443,6 +443,19 @@ static inline bool tc_should_offload(const struct net_device *dev, return true; } +static inline bool tc_skip_hw(u32 flags) +{ + return (flags & TCA_CLS_FLAGS_SKIP_HW) ? true : false; +} + +static inline bool tc_should_offload(const struct net_device *dev, + const struct tcf_proto *tp, u32 flags) +{ + if (tc_skip_hw(flags)) + return false; + return tc_can_offload(dev, tp); +} + static inline bool tc_skip_sw(u32 flags) { return (flags & TCA_CLS_FLAGS_SKIP_SW) ? true : false; diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index cd334c9584e9..f1b76b8e6d2d 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -6,6 +6,8 @@ #include <linux/if_vlan.h> #include <net/sch_generic.h> +#define DEFAULT_TX_QUEUE_LEN 1000 + struct qdisc_walker { int stop; int skip; diff --git a/include/net/raw.h b/include/net/raw.h index 3e789008394d..57c33dd22ec4 100644 --- a/include/net/raw.h +++ b/include/net/raw.h @@ -23,6 +23,12 @@ extern struct proto raw_prot; +extern struct raw_hashinfo raw_v4_hashinfo; +struct sock *__raw_v4_lookup(struct net *net, struct sock *sk, + unsigned short num, __be32 raddr, + __be32 laddr, int dif); + +int raw_abort(struct sock *sk, int err); void raw_icmp_error(struct sk_buff *, int, u32); int raw_local_deliver(struct sk_buff *, int); diff --git a/include/net/rawv6.h b/include/net/rawv6.h index 87783dea0791..cbe4e9de1894 100644 --- a/include/net/rawv6.h +++ b/include/net/rawv6.h @@ -3,6 +3,13 @@ #include <net/protocol.h> +extern struct raw_hashinfo raw_v6_hashinfo; +struct sock *__raw_v6_lookup(struct net *net, struct sock *sk, + unsigned short num, const struct in6_addr *loc_addr, + const struct in6_addr *rmt_addr, int dif); + +int raw_abort(struct sock *sk, int err); + void raw6_icmp_error(struct sk_buff *, int nexthdr, u8 type, u8 code, int inner_offset, __be32); bool raw6_local_deliver(struct sk_buff *, int); diff --git a/include/net/route.h b/include/net/route.h index 0429d47cad25..c0874c87c173 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -153,7 +153,7 @@ static inline struct rtable *ip_route_output_ports(struct net *net, struct flowi flowi4_init_output(fl4, oif, sk ? sk->sk_mark : 0, tos, RT_SCOPE_UNIVERSE, proto, sk ? inet_sk_flowi_flags(sk) : 0, - daddr, saddr, dport, sport); + daddr, saddr, dport, sport, sock_net_uid(net, sk)); if (sk) security_sk_classify_flow(sk, flowi4_to_flowi(fl4)); return ip_route_output_flow(net, fl4, sk); @@ -269,7 +269,8 @@ static inline void ip_route_connect_init(struct flowi4 *fl4, __be32 dst, __be32 flow_flags |= FLOWI_FLAG_ANYSRC; flowi4_init_output(fl4, oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE, - protocol, flow_flags, dst, src, dport, sport); + protocol, flow_flags, dst, src, dport, sport, + sk->sk_uid); } static inline struct rtable *ip_route_connect(struct flowi4 *fl4, diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index e6aa0a249672..498f81b229a4 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -76,7 +76,7 @@ struct Qdisc { struct netdev_queue *dev_queue; - struct gnet_stats_rate_est64 rate_est; + struct net_rate_estimator __rcu *rate_est; struct gnet_stats_basic_cpu __percpu *cpu_bstats; struct gnet_stats_queue __percpu *cpu_qstats; diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 31acc3f4f132..f0dcaebebddb 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -164,7 +164,7 @@ void sctp_backlog_migrate(struct sctp_association *assoc, struct sock *oldsk, struct sock *newsk); int sctp_transport_hashtable_init(void); void sctp_transport_hashtable_destroy(void); -void sctp_hash_transport(struct sctp_transport *t); +int sctp_hash_transport(struct sctp_transport *t); void sctp_unhash_transport(struct sctp_transport *t); struct sctp_transport *sctp_addrs_lookup_transport( struct net *net, diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 11c3bf262a85..92daabdc007d 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -124,7 +124,7 @@ extern struct sctp_globals { /* This is the sctp port control hash. */ struct sctp_bind_hashbucket *port_hashtable; /* This is the hash of all transports. */ - struct rhashtable transport_hashtable; + struct rhltable transport_hashtable; /* Sizes of above hashtables. */ int ep_hashsize; @@ -530,7 +530,6 @@ struct sctp_datamsg { /* Did the messenge fail to send? */ int send_error; u8 send_failed:1, - can_abandon:1, /* can chunks from this message can be abandoned. */ can_delay; /* should this message be Nagle delayed */ }; @@ -641,7 +640,6 @@ struct sctp_chunk { #define SCTP_NEED_FRTX 0x1 #define SCTP_DONT_FRTX 0x2 __u16 rtt_in_progress:1, /* This chunk used for RTT calc? */ - resent:1, /* Has this chunk ever been resent. */ has_tsn:1, /* Does this chunk have a TSN yet? */ has_ssn:1, /* Does this chunk have a SSN yet? */ singleton:1, /* Only chunk in the packet? */ @@ -656,6 +654,7 @@ struct sctp_chunk { fast_retransmit:2; /* Is this chunk fast retransmitted? */ }; +#define sctp_chunk_retransmitted(chunk) (chunk->sent_count > 1) void sctp_chunk_hold(struct sctp_chunk *); void sctp_chunk_put(struct sctp_chunk *); int sctp_user_addto_chunk(struct sctp_chunk *chunk, int len, @@ -762,7 +761,7 @@ static inline int sctp_packet_empty(struct sctp_packet *packet) struct sctp_transport { /* A list of transports. */ struct list_head transports; - struct rhash_head node; + struct rhlist_head node; /* Reference counting. */ atomic_t refcnt; diff --git a/include/net/secure_seq.h b/include/net/secure_seq.h index 3f36d45b714a..0caee631a836 100644 --- a/include/net/secure_seq.h +++ b/include/net/secure_seq.h @@ -6,10 +6,10 @@ u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport); u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, __be16 dport); -__u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr, - __be16 sport, __be16 dport); -__u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr, - __be16 sport, __be16 dport); +u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr, + __be16 sport, __be16 dport, u32 *tsoff); +u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr, + __be16 sport, __be16 dport, u32 *tsoff); u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport); u64 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr, diff --git a/include/net/seg6.h b/include/net/seg6.h new file mode 100644 index 000000000000..4e0357517d79 --- /dev/null +++ b/include/net/seg6.h @@ -0,0 +1,62 @@ +/* + * SR-IPv6 implementation + * + * Author: + * David Lebrun <david.lebrun@uclouvain.be> + * + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _NET_SEG6_H +#define _NET_SEG6_H + +#include <linux/net.h> +#include <linux/ipv6.h> +#include <net/lwtunnel.h> +#include <linux/seg6.h> +#include <linux/rhashtable.h> + +static inline void update_csum_diff4(struct sk_buff *skb, __be32 from, + __be32 to) +{ + __be32 diff[] = { ~from, to }; + + skb->csum = ~csum_partial((char *)diff, sizeof(diff), ~skb->csum); +} + +static inline void update_csum_diff16(struct sk_buff *skb, __be32 *from, + __be32 *to) +{ + __be32 diff[] = { + ~from[0], ~from[1], ~from[2], ~from[3], + to[0], to[1], to[2], to[3], + }; + + skb->csum = ~csum_partial((char *)diff, sizeof(diff), ~skb->csum); +} + +struct seg6_pernet_data { + struct mutex lock; + struct in6_addr __rcu *tun_src; +#ifdef CONFIG_IPV6_SEG6_HMAC + struct rhashtable hmac_infos; +#endif +}; + +static inline struct seg6_pernet_data *seg6_pernet(struct net *net) +{ + return net->ipv6.seg6_data; +} + +extern int seg6_init(void); +extern void seg6_exit(void); +extern int seg6_iptunnel_init(void); +extern void seg6_iptunnel_exit(void); + +extern bool seg6_validate_srh(struct ipv6_sr_hdr *srh, int len); + +#endif diff --git a/include/net/seg6_hmac.h b/include/net/seg6_hmac.h new file mode 100644 index 000000000000..69c3a106056b --- /dev/null +++ b/include/net/seg6_hmac.h @@ -0,0 +1,62 @@ +/* + * SR-IPv6 implementation + * + * Author: + * David Lebrun <david.lebrun@uclouvain.be> + * + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _NET_SEG6_HMAC_H +#define _NET_SEG6_HMAC_H + +#include <net/flow.h> +#include <net/ip6_fib.h> +#include <net/sock.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/route.h> +#include <net/seg6.h> +#include <linux/seg6_hmac.h> +#include <linux/rhashtable.h> + +#define SEG6_HMAC_MAX_DIGESTSIZE 160 +#define SEG6_HMAC_RING_SIZE 256 + +struct seg6_hmac_info { + struct rhash_head node; + struct rcu_head rcu; + + u32 hmackeyid; + char secret[SEG6_HMAC_SECRET_LEN]; + u8 slen; + u8 alg_id; +}; + +struct seg6_hmac_algo { + u8 alg_id; + char name[64]; + struct crypto_shash * __percpu *tfms; + struct shash_desc * __percpu *shashs; +}; + +extern int seg6_hmac_compute(struct seg6_hmac_info *hinfo, + struct ipv6_sr_hdr *hdr, struct in6_addr *saddr, + u8 *output); +extern struct seg6_hmac_info *seg6_hmac_info_lookup(struct net *net, u32 key); +extern int seg6_hmac_info_add(struct net *net, u32 key, + struct seg6_hmac_info *hinfo); +extern int seg6_hmac_info_del(struct net *net, u32 key); +extern int seg6_push_hmac(struct net *net, struct in6_addr *saddr, + struct ipv6_sr_hdr *srh); +extern bool seg6_hmac_validate_skb(struct sk_buff *skb); +extern int seg6_hmac_init(void); +extern void seg6_hmac_exit(void); +extern int seg6_hmac_net_init(struct net *net); +extern void seg6_hmac_net_exit(struct net *net); + +#endif diff --git a/include/net/sock.h b/include/net/sock.h index 92b269709b9a..e17aa3de2b4d 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -343,6 +343,9 @@ struct sock { #define sk_rxhash __sk_common.skc_rxhash socket_lock_t sk_lock; + atomic_t sk_drops; + int sk_rcvlowat; + struct sk_buff_head sk_error_queue; struct sk_buff_head sk_receive_queue; /* * The backlog queue is special, it is always used with @@ -359,14 +362,13 @@ struct sock { struct sk_buff *tail; } sk_backlog; #define sk_rmem_alloc sk_backlog.rmem_alloc - int sk_forward_alloc; - __u32 sk_txhash; + int sk_forward_alloc; #ifdef CONFIG_NET_RX_BUSY_POLL - unsigned int sk_napi_id; unsigned int sk_ll_usec; + /* ===== mostly read cache line ===== */ + unsigned int sk_napi_id; #endif - atomic_t sk_drops; int sk_rcvbuf; struct sk_filter __rcu *sk_filter; @@ -379,16 +381,50 @@ struct sock { #endif struct dst_entry *sk_rx_dst; struct dst_entry __rcu *sk_dst_cache; - /* Note: 32bit hole on 64bit arches */ - atomic_t sk_wmem_alloc; atomic_t sk_omem_alloc; int sk_sndbuf; + + /* ===== cache line for TX ===== */ + int sk_wmem_queued; + atomic_t sk_wmem_alloc; + unsigned long sk_tsq_flags; + struct sk_buff *sk_send_head; struct sk_buff_head sk_write_queue; + __s32 sk_peek_off; + int sk_write_pending; + long sk_sndtimeo; + struct timer_list sk_timer; + __u32 sk_priority; + __u32 sk_mark; + u32 sk_pacing_rate; /* bytes per second */ + u32 sk_max_pacing_rate; + struct page_frag sk_frag; + netdev_features_t sk_route_caps; + netdev_features_t sk_route_nocaps; + int sk_gso_type; + unsigned int sk_gso_max_size; + gfp_t sk_allocation; + __u32 sk_txhash; /* * Because of non atomicity rules, all * changes are protected by socket lock. */ + unsigned int __sk_flags_offset[0]; +#ifdef __BIG_ENDIAN_BITFIELD +#define SK_FL_PROTO_SHIFT 16 +#define SK_FL_PROTO_MASK 0x00ff0000 + +#define SK_FL_TYPE_SHIFT 0 +#define SK_FL_TYPE_MASK 0x0000ffff +#else +#define SK_FL_PROTO_SHIFT 8 +#define SK_FL_PROTO_MASK 0x0000ff00 + +#define SK_FL_TYPE_SHIFT 16 +#define SK_FL_TYPE_MASK 0xffff0000 +#endif + kmemcheck_bitfield_begin(flags); unsigned int sk_padding : 2, sk_no_check_tx : 1, @@ -399,41 +435,24 @@ struct sock { #define SK_PROTOCOL_MAX U8_MAX kmemcheck_bitfield_end(flags); - int sk_wmem_queued; - gfp_t sk_allocation; - u32 sk_pacing_rate; /* bytes per second */ - u32 sk_max_pacing_rate; - netdev_features_t sk_route_caps; - netdev_features_t sk_route_nocaps; - int sk_gso_type; - unsigned int sk_gso_max_size; u16 sk_gso_max_segs; - int sk_rcvlowat; unsigned long sk_lingertime; - struct sk_buff_head sk_error_queue; struct proto *sk_prot_creator; rwlock_t sk_callback_lock; int sk_err, sk_err_soft; u32 sk_ack_backlog; u32 sk_max_ack_backlog; - __u32 sk_priority; - __u32 sk_mark; + kuid_t sk_uid; struct pid *sk_peer_pid; const struct cred *sk_peer_cred; long sk_rcvtimeo; - long sk_sndtimeo; - struct timer_list sk_timer; ktime_t sk_stamp; u16 sk_tsflags; u8 sk_shutdown; u32 sk_tskey; struct socket *sk_socket; void *sk_user_data; - struct page_frag sk_frag; - struct sk_buff *sk_send_head; - __s32 sk_peek_off; - int sk_write_pending; #ifdef CONFIG_SECURITY void *sk_security; #endif @@ -894,7 +913,20 @@ static inline void sock_rps_record_flow_hash(__u32 hash) static inline void sock_rps_record_flow(const struct sock *sk) { #ifdef CONFIG_RPS - sock_rps_record_flow_hash(sk->sk_rxhash); + if (static_key_false(&rfs_needed)) { + /* Reading sk->sk_rxhash might incur an expensive cache line + * miss. + * + * TCP_ESTABLISHED does cover almost all states where RFS + * might be useful, and is cheaper [1] than testing : + * IPv4: inet_sk(sk)->inet_daddr + * IPv6: ipv6_addr_any(&sk->sk_v6_daddr) + * OR an additional socket flag + * [1] : sk_state and sk_prot are in the same cache line. + */ + if (sk->sk_state == TCP_ESTABLISHED) + sock_rps_record_flow_hash(sk->sk_rxhash); + } #endif } @@ -914,14 +946,16 @@ static inline void sock_rps_reset_rxhash(struct sock *sk) #endif } -#define sk_wait_event(__sk, __timeo, __condition) \ +#define sk_wait_event(__sk, __timeo, __condition, __wait) \ ({ int __rc; \ release_sock(__sk); \ __rc = __condition; \ if (!__rc) { \ - *(__timeo) = schedule_timeout(*(__timeo)); \ + *(__timeo) = wait_woken(__wait, \ + TASK_INTERRUPTIBLE, \ + *(__timeo)); \ } \ - sched_annotate_sleep(); \ + sched_annotate_sleep(); \ lock_sock(__sk); \ __rc = __condition; \ __rc; \ @@ -1162,11 +1196,6 @@ static inline void sk_enter_memory_pressure(struct sock *sk) sk->sk_prot->enter_memory_pressure(sk); } -static inline long sk_prot_mem_limits(const struct sock *sk, int index) -{ - return sk->sk_prot->sysctl_mem[index]; -} - static inline long sk_memory_allocated(const struct sock *sk) { @@ -1276,14 +1305,32 @@ static inline struct inode *SOCK_INODE(struct socket *socket) /* * Functions for memory accounting */ +int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind); int __sk_mem_schedule(struct sock *sk, int size, int kind); +void __sk_mem_reduce_allocated(struct sock *sk, int amount); void __sk_mem_reclaim(struct sock *sk, int amount); -#define SK_MEM_QUANTUM ((int)PAGE_SIZE) +/* We used to have PAGE_SIZE here, but systems with 64KB pages + * do not necessarily have 16x time more memory than 4KB ones. + */ +#define SK_MEM_QUANTUM 4096 #define SK_MEM_QUANTUM_SHIFT ilog2(SK_MEM_QUANTUM) #define SK_MEM_SEND 0 #define SK_MEM_RECV 1 +/* sysctl_mem values are in pages, we convert them in SK_MEM_QUANTUM units */ +static inline long sk_prot_mem_limits(const struct sock *sk, int index) +{ + long val = sk->sk_prot->sysctl_mem[index]; + +#if PAGE_SIZE > SK_MEM_QUANTUM + val <<= PAGE_SHIFT - SK_MEM_QUANTUM_SHIFT; +#elif PAGE_SIZE < SK_MEM_QUANTUM + val >>= SK_MEM_QUANTUM_SHIFT - PAGE_SHIFT; +#endif + return val; +} + static inline int sk_mem_pages(int amt) { return (amt + SK_MEM_QUANTUM - 1) >> SK_MEM_QUANTUM_SHIFT; @@ -1651,6 +1698,7 @@ static inline void sock_graft(struct sock *sk, struct socket *parent) sk->sk_wq = parent->wq; parent->sk = sk; sk_set_socket(sk, parent); + sk->sk_uid = SOCK_INODE(parent)->i_uid; security_sock_graft(sk, parent); write_unlock_bh(&sk->sk_callback_lock); } @@ -1658,6 +1706,11 @@ static inline void sock_graft(struct sock *sk, struct socket *parent) kuid_t sock_i_uid(struct sock *sk); unsigned long sock_i_ino(struct sock *sk); +static inline kuid_t sock_net_uid(const struct net *net, const struct sock *sk) +{ + return sk ? sk->sk_uid : make_kuid(net->user_ns, 0); +} + static inline u32 net_tx_rndhash(void) { u32 v = prandom_u32(); @@ -1952,6 +2005,8 @@ void sk_reset_timer(struct sock *sk, struct timer_list *timer, void sk_stop_timer(struct sock *sk, struct timer_list *timer); +int __sk_queue_drop_skb(struct sock *sk, struct sk_buff *skb, + unsigned int flags); int __sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); @@ -2108,7 +2163,8 @@ struct sock_skb_cb { static inline void sock_skb_set_dropcount(const struct sock *sk, struct sk_buff *skb) { - SOCK_SKB_CB(skb)->dropcount = atomic_read(&sk->sk_drops); + SOCK_SKB_CB(skb)->dropcount = sock_flag(sk, SOCK_RXQ_OVFL) ? + atomic_read(&sk->sk_drops) : 0; } static inline void sk_drops_add(struct sock *sk, const struct sk_buff *skb) diff --git a/include/net/tc_act/tc_mirred.h b/include/net/tc_act/tc_mirred.h index 62770add15bd..604bc31e23ab 100644 --- a/include/net/tc_act/tc_mirred.h +++ b/include/net/tc_act/tc_mirred.h @@ -8,13 +8,13 @@ struct tcf_mirred { struct tc_action common; int tcfm_eaction; int tcfm_ifindex; - int tcfm_ok_push; + bool tcfm_mac_header_xmit; struct net_device __rcu *tcfm_dev; struct list_head tcfm_list; }; #define to_mirred(a) ((struct tcf_mirred *)a) -static inline bool is_tcf_mirred_redirect(const struct tc_action *a) +static inline bool is_tcf_mirred_egress_redirect(const struct tc_action *a) { #ifdef CONFIG_NET_CLS_ACT if (a->ops && a->ops->type == TCA_ACT_MIRRED) @@ -23,7 +23,7 @@ static inline bool is_tcf_mirred_redirect(const struct tc_action *a) return false; } -static inline bool is_tcf_mirred_mirror(const struct tc_action *a) +static inline bool is_tcf_mirred_egress_mirror(const struct tc_action *a) { #ifdef CONFIG_NET_CLS_ACT if (a->ops && a->ops->type == TCA_ACT_MIRRED) diff --git a/include/net/tc_act/tc_skbedit.h b/include/net/tc_act/tc_skbedit.h index 5767e9dbcf92..19cd3d345804 100644 --- a/include/net/tc_act/tc_skbedit.h +++ b/include/net/tc_act/tc_skbedit.h @@ -27,6 +27,7 @@ struct tcf_skbedit { u32 flags; u32 priority; u32 mark; + u32 mask; u16 queue_mapping; u16 ptype; }; diff --git a/include/net/tc_act/tc_tunnel_key.h b/include/net/tc_act/tc_tunnel_key.h index 253f8da6c2a6..efef0b4b1b2b 100644 --- a/include/net/tc_act/tc_tunnel_key.h +++ b/include/net/tc_act/tc_tunnel_key.h @@ -12,6 +12,8 @@ #define __NET_TC_TUNNEL_KEY_H #include <net/act_api.h> +#include <linux/tc_act/tc_tunnel_key.h> +#include <net/dst_metadata.h> struct tcf_tunnel_key_params { struct rcu_head rcu; @@ -27,4 +29,39 @@ struct tcf_tunnel_key { #define to_tunnel_key(a) ((struct tcf_tunnel_key *)a) +static inline bool is_tcf_tunnel_set(const struct tc_action *a) +{ +#ifdef CONFIG_NET_CLS_ACT + struct tcf_tunnel_key *t = to_tunnel_key(a); + struct tcf_tunnel_key_params *params = rtnl_dereference(t->params); + + if (a->ops && a->ops->type == TCA_ACT_TUNNEL_KEY) + return params->tcft_action == TCA_TUNNEL_KEY_ACT_SET; +#endif + return false; +} + +static inline bool is_tcf_tunnel_release(const struct tc_action *a) +{ +#ifdef CONFIG_NET_CLS_ACT + struct tcf_tunnel_key *t = to_tunnel_key(a); + struct tcf_tunnel_key_params *params = rtnl_dereference(t->params); + + if (a->ops && a->ops->type == TCA_ACT_TUNNEL_KEY) + return params->tcft_action == TCA_TUNNEL_KEY_ACT_RELEASE; +#endif + return false; +} + +static inline struct ip_tunnel_info *tcf_tunnel_info(const struct tc_action *a) +{ +#ifdef CONFIG_NET_CLS_ACT + struct tcf_tunnel_key *t = to_tunnel_key(a); + struct tcf_tunnel_key_params *params = rtnl_dereference(t->params); + + return ¶ms->tcft_enc_metadata->u.tun_info; +#else + return NULL; +#endif +} #endif /* __NET_TC_TUNNEL_KEY_H */ diff --git a/include/net/tcp.h b/include/net/tcp.h index 123979fe12bf..207147b4c6b2 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -958,6 +958,7 @@ u32 tcp_slow_start(struct tcp_sock *tp, u32 acked); void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked); u32 tcp_reno_ssthresh(struct sock *sk); +u32 tcp_reno_undo_cwnd(struct sock *sk); void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked); extern struct tcp_congestion_ops tcp_reno; @@ -1515,11 +1516,26 @@ struct tcp_fastopen_context { struct rcu_head rcu; }; +/* Latencies incurred by various limits for a sender. They are + * chronograph-like stats that are mutually exclusive. + */ +enum tcp_chrono { + TCP_CHRONO_UNSPEC, + TCP_CHRONO_BUSY, /* Actively sending data (non-empty write queue) */ + TCP_CHRONO_RWND_LIMITED, /* Stalled by insufficient receive window */ + TCP_CHRONO_SNDBUF_LIMITED, /* Stalled by insufficient send buffer */ + __TCP_CHRONO_MAX, +}; + +void tcp_chrono_start(struct sock *sk, const enum tcp_chrono type); +void tcp_chrono_stop(struct sock *sk, const enum tcp_chrono type); + /* write queue abstraction */ static inline void tcp_write_queue_purge(struct sock *sk) { struct sk_buff *skb; + tcp_chrono_stop(sk, TCP_CHRONO_BUSY); while ((skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) sk_wmem_free_skb(sk, skb); sk_mem_reclaim(sk); @@ -1578,8 +1594,10 @@ static inline void tcp_advance_send_head(struct sock *sk, const struct sk_buff * static inline void tcp_check_send_head(struct sock *sk, struct sk_buff *skb_unlinked) { - if (sk->sk_send_head == skb_unlinked) + if (sk->sk_send_head == skb_unlinked) { sk->sk_send_head = NULL; + tcp_chrono_stop(sk, TCP_CHRONO_BUSY); + } if (tcp_sk(sk)->highest_sack == skb_unlinked) tcp_sk(sk)->highest_sack = NULL; } @@ -1601,6 +1619,7 @@ static inline void tcp_add_write_queue_tail(struct sock *sk, struct sk_buff *skb /* Queue it, remembering where we must start sending. */ if (sk->sk_send_head == NULL) { sk->sk_send_head = skb; + tcp_chrono_start(sk, TCP_CHRONO_BUSY); if (tcp_sk(sk)->highest_sack == NULL) tcp_sk(sk)->highest_sack = skb; @@ -1808,7 +1827,7 @@ struct tcp_request_sock_ops { struct dst_entry *(*route_req)(const struct sock *sk, struct flowi *fl, const struct request_sock *req, bool *strict); - __u32 (*init_seq)(const struct sk_buff *skb); + __u32 (*init_seq)(const struct sk_buff *skb, u32 *tsoff); int (*send_synack)(const struct sock *sk, struct dst_entry *dst, struct flowi *fl, struct request_sock *req, struct tcp_fastopen_cookie *foc, diff --git a/include/net/udp.h b/include/net/udp.h index 4948790d393d..1661791e8ca1 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -246,6 +246,25 @@ static inline __be16 udp_flow_src_port(struct net *net, struct sk_buff *skb, } /* net/ipv4/udp.c */ +void udp_destruct_sock(struct sock *sk); +void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len); +int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb); +void udp_skb_destructor(struct sock *sk, struct sk_buff *skb); +static inline struct sk_buff * +__skb_recv_udp(struct sock *sk, unsigned int flags, int noblock, int *peeked, + int *off, int *err) +{ + return __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), + udp_skb_destructor, peeked, off, err); +} +static inline struct sk_buff *skb_recv_udp(struct sock *sk, unsigned int flags, + int noblock, int *err) +{ + int peeked, off = 0; + + return __skb_recv_udp(sk, flags, noblock, &peeked, &off, err); +} + void udp_v4_early_demux(struct sk_buff *skb); int udp_get_port(struct sock *sk, unsigned short snum, int (*saddr_cmp)(const struct sock *, @@ -258,6 +277,7 @@ void udp_flush_pending_frames(struct sock *sk); void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst); int udp_rcv(struct sk_buff *skb); int udp_ioctl(struct sock *sk, int cmd, unsigned long arg); +int udp_init_sock(struct sock *sk); int __udp_disconnect(struct sock *sk, int flags); int udp_disconnect(struct sock *sk, int flags); unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait); diff --git a/include/net/udplite.h b/include/net/udplite.h index 80761938b9a7..36097d388219 100644 --- a/include/net/udplite.h +++ b/include/net/udplite.h @@ -27,6 +27,7 @@ static __inline__ int udplite_getfrag(void *from, char *to, int offset, static inline int udplite_sk_init(struct sock *sk) { udp_sk(sk)->pcflag = UDPLITE_BIT; + sk->sk_destruct = udp_destruct_sock; return 0; } diff --git a/include/net/vxlan.h b/include/net/vxlan.h index 308adc4154f4..49a59202f85e 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -281,16 +281,6 @@ struct vxlan_dev { struct net_device *vxlan_dev_create(struct net *net, const char *name, u8 name_assign_type, struct vxlan_config *conf); -static inline __be16 vxlan_dev_dst_port(struct vxlan_dev *vxlan, - unsigned short family) -{ -#if IS_ENABLED(CONFIG_IPV6) - if (family == AF_INET6) - return inet_sk(vxlan->vn6_sock->sock->sk)->inet_sport; -#endif - return inet_sk(vxlan->vn4_sock->sock->sk)->inet_sport; -} - static inline netdev_features_t vxlan_features_check(struct sk_buff *skb, netdev_features_t features) { diff --git a/include/trace/events/alarmtimer.h b/include/trace/events/alarmtimer.h new file mode 100644 index 000000000000..a1c108c16c9c --- /dev/null +++ b/include/trace/events/alarmtimer.h @@ -0,0 +1,96 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM alarmtimer + +#if !defined(_TRACE_ALARMTIMER_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_ALARMTIMER_H + +#include <linux/alarmtimer.h> +#include <linux/rtc.h> +#include <linux/tracepoint.h> + +TRACE_DEFINE_ENUM(ALARM_REALTIME); +TRACE_DEFINE_ENUM(ALARM_BOOTTIME); +TRACE_DEFINE_ENUM(ALARM_REALTIME_FREEZER); +TRACE_DEFINE_ENUM(ALARM_BOOTTIME_FREEZER); + +#define show_alarm_type(type) __print_flags(type, " | ", \ + { 1 << ALARM_REALTIME, "REALTIME" }, \ + { 1 << ALARM_BOOTTIME, "BOOTTIME" }, \ + { 1 << ALARM_REALTIME_FREEZER, "REALTIME Freezer" }, \ + { 1 << ALARM_BOOTTIME_FREEZER, "BOOTTIME Freezer" }) + +TRACE_EVENT(alarmtimer_suspend, + + TP_PROTO(ktime_t expires, int flag), + + TP_ARGS(expires, flag), + + TP_STRUCT__entry( + __field(s64, expires) + __field(unsigned char, alarm_type) + ), + + TP_fast_assign( + __entry->expires = expires.tv64; + __entry->alarm_type = flag; + ), + + TP_printk("alarmtimer type:%s expires:%llu", + show_alarm_type((1 << __entry->alarm_type)), + __entry->expires + ) +); + +DECLARE_EVENT_CLASS(alarm_class, + + TP_PROTO(struct alarm *alarm, ktime_t now), + + TP_ARGS(alarm, now), + + TP_STRUCT__entry( + __field(void *, alarm) + __field(unsigned char, alarm_type) + __field(s64, expires) + __field(s64, now) + ), + + TP_fast_assign( + __entry->alarm = alarm; + __entry->alarm_type = alarm->type; + __entry->expires = alarm->node.expires.tv64; + __entry->now = now.tv64; + ), + + TP_printk("alarmtimer:%p type:%s expires:%llu now:%llu", + __entry->alarm, + show_alarm_type((1 << __entry->alarm_type)), + __entry->expires, + __entry->now + ) +); + +DEFINE_EVENT(alarm_class, alarmtimer_fired, + + TP_PROTO(struct alarm *alarm, ktime_t now), + + TP_ARGS(alarm, now) +); + +DEFINE_EVENT(alarm_class, alarmtimer_start, + + TP_PROTO(struct alarm *alarm, ktime_t now), + + TP_ARGS(alarm, now) +); + +DEFINE_EVENT(alarm_class, alarmtimer_cancel, + + TP_PROTO(struct alarm *alarm, ktime_t now), + + TP_ARGS(alarm, now) +); + +#endif /* _TRACE_ALARMTIMER_H */ + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/include/trace/events/mdio.h b/include/trace/events/mdio.h new file mode 100644 index 000000000000..00d85f5f54e4 --- /dev/null +++ b/include/trace/events/mdio.h @@ -0,0 +1,42 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM mdio + +#if !defined(_TRACE_MDIO_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_MDIO_H + +#include <linux/tracepoint.h> + +TRACE_EVENT_CONDITION(mdio_access, + + TP_PROTO(struct mii_bus *bus, char read, + u8 addr, unsigned regnum, u16 val, int err), + + TP_ARGS(bus, read, addr, regnum, val, err), + + TP_CONDITION(err >= 0), + + TP_STRUCT__entry( + __array(char, busid, MII_BUS_ID_SIZE) + __field(char, read) + __field(u8, addr) + __field(u16, val) + __field(unsigned, regnum) + ), + + TP_fast_assign( + strncpy(__entry->busid, bus->id, MII_BUS_ID_SIZE); + __entry->read = read; + __entry->addr = addr; + __entry->regnum = regnum; + __entry->val = val; + ), + + TP_printk("%s %-5s phy:0x%02hhx reg:0x%02x val:0x%04hx", + __entry->busid, __entry->read ? "read" : "write", + __entry->addr, __entry->regnum, __entry->val) +); + +#endif /* if !defined(_TRACE_MDIO_H) || defined(TRACE_HEADER_MULTI_READ) */ + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index d3e756539d44..9d4f9b3a2b7b 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -698,7 +698,10 @@ TRACE_EVENT(rcu_batch_end, /* * Tracepoint for rcutorture readers. The first argument is the name * of the RCU flavor from rcutorture's viewpoint and the second argument - * is the callback address. + * is the callback address. The third argument is the start time in + * seconds, and the last two arguments are the grace period numbers + * at the beginning and end of the read, respectively. Note that the + * callback address can be NULL. */ TRACE_EVENT(rcu_torture_read, diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h index 67d632f1743d..2c748ddad5f8 100644 --- a/include/uapi/asm-generic/socket.h +++ b/include/uapi/asm-generic/socket.h @@ -92,4 +92,6 @@ #define SO_CNX_ADVICE 53 +#define SCM_TIMESTAMPING_OPT_STATS 54 + #endif /* __ASM_GENERIC_SOCKET_H */ diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index f09c70b97eca..0eb0e87dbe9f 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -73,6 +73,8 @@ enum bpf_cmd { BPF_PROG_LOAD, BPF_OBJ_PIN, BPF_OBJ_GET, + BPF_PROG_ATTACH, + BPF_PROG_DETACH, }; enum bpf_map_type { @@ -85,6 +87,8 @@ enum bpf_map_type { BPF_MAP_TYPE_PERCPU_ARRAY, BPF_MAP_TYPE_STACK_TRACE, BPF_MAP_TYPE_CGROUP_ARRAY, + BPF_MAP_TYPE_LRU_HASH, + BPF_MAP_TYPE_LRU_PERCPU_HASH, }; enum bpf_prog_type { @@ -96,8 +100,22 @@ enum bpf_prog_type { BPF_PROG_TYPE_TRACEPOINT, BPF_PROG_TYPE_XDP, BPF_PROG_TYPE_PERF_EVENT, + BPF_PROG_TYPE_CGROUP_SKB, + BPF_PROG_TYPE_CGROUP_SOCK, + BPF_PROG_TYPE_LWT_IN, + BPF_PROG_TYPE_LWT_OUT, + BPF_PROG_TYPE_LWT_XMIT, }; +enum bpf_attach_type { + BPF_CGROUP_INET_INGRESS, + BPF_CGROUP_INET_EGRESS, + BPF_CGROUP_INET_SOCK_CREATE, + __MAX_BPF_ATTACH_TYPE +}; + +#define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE + #define BPF_PSEUDO_MAP_FD 1 /* flags for BPF_MAP_UPDATE_ELEM command */ @@ -106,6 +124,13 @@ enum bpf_prog_type { #define BPF_EXIST 2 /* update existing element */ #define BPF_F_NO_PREALLOC (1U << 0) +/* Instead of having one common LRU list in the + * BPF_MAP_TYPE_LRU_[PERCPU_]HASH map, use a percpu LRU list + * which can scale and perform better. + * Note, the LRU nodes (including free nodes) cannot be moved + * across different LRU lists. + */ +#define BPF_F_NO_COMMON_LRU (1U << 1) union bpf_attr { struct { /* anonymous struct used by BPF_MAP_CREATE command */ @@ -141,293 +166,327 @@ union bpf_attr { __aligned_u64 pathname; __u32 bpf_fd; }; + + struct { /* anonymous struct used by BPF_PROG_ATTACH/DETACH commands */ + __u32 target_fd; /* container object to attach to */ + __u32 attach_bpf_fd; /* eBPF program to attach */ + __u32 attach_type; + }; } __attribute__((aligned(8))); +/* BPF helper function descriptions: + * + * void *bpf_map_lookup_elem(&map, &key) + * Return: Map value or NULL + * + * int bpf_map_update_elem(&map, &key, &value, flags) + * Return: 0 on success or negative error + * + * int bpf_map_delete_elem(&map, &key) + * Return: 0 on success or negative error + * + * int bpf_probe_read(void *dst, int size, void *src) + * Return: 0 on success or negative error + * + * u64 bpf_ktime_get_ns(void) + * Return: current ktime + * + * int bpf_trace_printk(const char *fmt, int fmt_size, ...) + * Return: length of buffer written or negative error + * + * u32 bpf_prandom_u32(void) + * Return: random value + * + * u32 bpf_raw_smp_processor_id(void) + * Return: SMP processor ID + * + * int bpf_skb_store_bytes(skb, offset, from, len, flags) + * store bytes into packet + * @skb: pointer to skb + * @offset: offset within packet from skb->mac_header + * @from: pointer where to copy bytes from + * @len: number of bytes to store into packet + * @flags: bit 0 - if true, recompute skb->csum + * other bits - reserved + * Return: 0 on success or negative error + * + * int bpf_l3_csum_replace(skb, offset, from, to, flags) + * recompute IP checksum + * @skb: pointer to skb + * @offset: offset within packet where IP checksum is located + * @from: old value of header field + * @to: new value of header field + * @flags: bits 0-3 - size of header field + * other bits - reserved + * Return: 0 on success or negative error + * + * int bpf_l4_csum_replace(skb, offset, from, to, flags) + * recompute TCP/UDP checksum + * @skb: pointer to skb + * @offset: offset within packet where TCP/UDP checksum is located + * @from: old value of header field + * @to: new value of header field + * @flags: bits 0-3 - size of header field + * bit 4 - is pseudo header + * other bits - reserved + * Return: 0 on success or negative error + * + * int bpf_tail_call(ctx, prog_array_map, index) + * jump into another BPF program + * @ctx: context pointer passed to next program + * @prog_array_map: pointer to map which type is BPF_MAP_TYPE_PROG_ARRAY + * @index: index inside array that selects specific program to run + * Return: 0 on success or negative error + * + * int bpf_clone_redirect(skb, ifindex, flags) + * redirect to another netdev + * @skb: pointer to skb + * @ifindex: ifindex of the net device + * @flags: bit 0 - if set, redirect to ingress instead of egress + * other bits - reserved + * Return: 0 on success or negative error + * + * u64 bpf_get_current_pid_tgid(void) + * Return: current->tgid << 32 | current->pid + * + * u64 bpf_get_current_uid_gid(void) + * Return: current_gid << 32 | current_uid + * + * int bpf_get_current_comm(char *buf, int size_of_buf) + * stores current->comm into buf + * Return: 0 on success or negative error + * + * u32 bpf_get_cgroup_classid(skb) + * retrieve a proc's classid + * @skb: pointer to skb + * Return: classid if != 0 + * + * int bpf_skb_vlan_push(skb, vlan_proto, vlan_tci) + * Return: 0 on success or negative error + * + * int bpf_skb_vlan_pop(skb) + * Return: 0 on success or negative error + * + * int bpf_skb_get_tunnel_key(skb, key, size, flags) + * int bpf_skb_set_tunnel_key(skb, key, size, flags) + * retrieve or populate tunnel metadata + * @skb: pointer to skb + * @key: pointer to 'struct bpf_tunnel_key' + * @size: size of 'struct bpf_tunnel_key' + * @flags: room for future extensions + * Return: 0 on success or negative error + * + * u64 bpf_perf_event_read(&map, index) + * Return: Number events read or error code + * + * int bpf_redirect(ifindex, flags) + * redirect to another netdev + * @ifindex: ifindex of the net device + * @flags: bit 0 - if set, redirect to ingress instead of egress + * other bits - reserved + * Return: TC_ACT_REDIRECT + * + * u32 bpf_get_route_realm(skb) + * retrieve a dst's tclassid + * @skb: pointer to skb + * Return: realm if != 0 + * + * int bpf_perf_event_output(ctx, map, index, data, size) + * output perf raw sample + * @ctx: struct pt_regs* + * @map: pointer to perf_event_array map + * @index: index of event in the map + * @data: data on stack to be output as raw data + * @size: size of data + * Return: 0 on success or negative error + * + * int bpf_get_stackid(ctx, map, flags) + * walk user or kernel stack and return id + * @ctx: struct pt_regs* + * @map: pointer to stack_trace map + * @flags: bits 0-7 - numer of stack frames to skip + * bit 8 - collect user stack instead of kernel + * bit 9 - compare stacks by hash only + * bit 10 - if two different stacks hash into the same stackid + * discard old + * other bits - reserved + * Return: >= 0 stackid on success or negative error + * + * s64 bpf_csum_diff(from, from_size, to, to_size, seed) + * calculate csum diff + * @from: raw from buffer + * @from_size: length of from buffer + * @to: raw to buffer + * @to_size: length of to buffer + * @seed: optional seed + * Return: csum result or negative error code + * + * int bpf_skb_get_tunnel_opt(skb, opt, size) + * retrieve tunnel options metadata + * @skb: pointer to skb + * @opt: pointer to raw tunnel option data + * @size: size of @opt + * Return: option size + * + * int bpf_skb_set_tunnel_opt(skb, opt, size) + * populate tunnel options metadata + * @skb: pointer to skb + * @opt: pointer to raw tunnel option data + * @size: size of @opt + * Return: 0 on success or negative error + * + * int bpf_skb_change_proto(skb, proto, flags) + * Change protocol of the skb. Currently supported is v4 -> v6, + * v6 -> v4 transitions. The helper will also resize the skb. eBPF + * program is expected to fill the new headers via skb_store_bytes + * and lX_csum_replace. + * @skb: pointer to skb + * @proto: new skb->protocol type + * @flags: reserved + * Return: 0 on success or negative error + * + * int bpf_skb_change_type(skb, type) + * Change packet type of skb. + * @skb: pointer to skb + * @type: new skb->pkt_type type + * Return: 0 on success or negative error + * + * int bpf_skb_under_cgroup(skb, map, index) + * Check cgroup2 membership of skb + * @skb: pointer to skb + * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type + * @index: index of the cgroup in the bpf_map + * Return: + * == 0 skb failed the cgroup2 descendant test + * == 1 skb succeeded the cgroup2 descendant test + * < 0 error + * + * u32 bpf_get_hash_recalc(skb) + * Retrieve and possibly recalculate skb->hash. + * @skb: pointer to skb + * Return: hash + * + * u64 bpf_get_current_task(void) + * Returns current task_struct + * Return: current + * + * int bpf_probe_write_user(void *dst, void *src, int len) + * safely attempt to write to a location + * @dst: destination address in userspace + * @src: source address on stack + * @len: number of bytes to copy + * Return: 0 on success or negative error + * + * int bpf_current_task_under_cgroup(map, index) + * Check cgroup2 membership of current task + * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type + * @index: index of the cgroup in the bpf_map + * Return: + * == 0 current failed the cgroup2 descendant test + * == 1 current succeeded the cgroup2 descendant test + * < 0 error + * + * int bpf_skb_change_tail(skb, len, flags) + * The helper will resize the skb to the given new size, to be used f.e. + * with control messages. + * @skb: pointer to skb + * @len: new skb length + * @flags: reserved + * Return: 0 on success or negative error + * + * int bpf_skb_pull_data(skb, len) + * The helper will pull in non-linear data in case the skb is non-linear + * and not all of len are part of the linear section. Only needed for + * read/write with direct packet access. + * @skb: pointer to skb + * @len: len to make read/writeable + * Return: 0 on success or negative error + * + * s64 bpf_csum_update(skb, csum) + * Adds csum into skb->csum in case of CHECKSUM_COMPLETE. + * @skb: pointer to skb + * @csum: csum to add + * Return: csum on success or negative error + * + * void bpf_set_hash_invalid(skb) + * Invalidate current skb->hash. + * @skb: pointer to skb + * + * int bpf_get_numa_node_id() + * Return: Id of current NUMA node. + * + * int bpf_skb_change_head() + * Grows headroom of skb and adjusts MAC header offset accordingly. + * Will extends/reallocae as required automatically. + * May change skb data pointer and will thus invalidate any check + * performed for direct packet access. + * @skb: pointer to skb + * @len: length of header to be pushed in front + * @flags: Flags (unused for now) + * Return: 0 on success or negative error + * + * int bpf_xdp_adjust_head(xdp_md, delta) + * Adjust the xdp_md.data by delta + * @xdp_md: pointer to xdp_md + * @delta: An positive/negative integer to be added to xdp_md.data + * Return: 0 on success or negative on error + */ +#define __BPF_FUNC_MAPPER(FN) \ + FN(unspec), \ + FN(map_lookup_elem), \ + FN(map_update_elem), \ + FN(map_delete_elem), \ + FN(probe_read), \ + FN(ktime_get_ns), \ + FN(trace_printk), \ + FN(get_prandom_u32), \ + FN(get_smp_processor_id), \ + FN(skb_store_bytes), \ + FN(l3_csum_replace), \ + FN(l4_csum_replace), \ + FN(tail_call), \ + FN(clone_redirect), \ + FN(get_current_pid_tgid), \ + FN(get_current_uid_gid), \ + FN(get_current_comm), \ + FN(get_cgroup_classid), \ + FN(skb_vlan_push), \ + FN(skb_vlan_pop), \ + FN(skb_get_tunnel_key), \ + FN(skb_set_tunnel_key), \ + FN(perf_event_read), \ + FN(redirect), \ + FN(get_route_realm), \ + FN(perf_event_output), \ + FN(skb_load_bytes), \ + FN(get_stackid), \ + FN(csum_diff), \ + FN(skb_get_tunnel_opt), \ + FN(skb_set_tunnel_opt), \ + FN(skb_change_proto), \ + FN(skb_change_type), \ + FN(skb_under_cgroup), \ + FN(get_hash_recalc), \ + FN(get_current_task), \ + FN(probe_write_user), \ + FN(current_task_under_cgroup), \ + FN(skb_change_tail), \ + FN(skb_pull_data), \ + FN(csum_update), \ + FN(set_hash_invalid), \ + FN(get_numa_node_id), \ + FN(skb_change_head), \ + FN(xdp_adjust_head), + /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call */ +#define __BPF_ENUM_FN(x) BPF_FUNC_ ## x enum bpf_func_id { - BPF_FUNC_unspec, - BPF_FUNC_map_lookup_elem, /* void *map_lookup_elem(&map, &key) */ - BPF_FUNC_map_update_elem, /* int map_update_elem(&map, &key, &value, flags) */ - BPF_FUNC_map_delete_elem, /* int map_delete_elem(&map, &key) */ - BPF_FUNC_probe_read, /* int bpf_probe_read(void *dst, int size, void *src) */ - BPF_FUNC_ktime_get_ns, /* u64 bpf_ktime_get_ns(void) */ - BPF_FUNC_trace_printk, /* int bpf_trace_printk(const char *fmt, int fmt_size, ...) */ - BPF_FUNC_get_prandom_u32, /* u32 prandom_u32(void) */ - BPF_FUNC_get_smp_processor_id, /* u32 raw_smp_processor_id(void) */ - - /** - * skb_store_bytes(skb, offset, from, len, flags) - store bytes into packet - * @skb: pointer to skb - * @offset: offset within packet from skb->mac_header - * @from: pointer where to copy bytes from - * @len: number of bytes to store into packet - * @flags: bit 0 - if true, recompute skb->csum - * other bits - reserved - * Return: 0 on success - */ - BPF_FUNC_skb_store_bytes, - - /** - * l3_csum_replace(skb, offset, from, to, flags) - recompute IP checksum - * @skb: pointer to skb - * @offset: offset within packet where IP checksum is located - * @from: old value of header field - * @to: new value of header field - * @flags: bits 0-3 - size of header field - * other bits - reserved - * Return: 0 on success - */ - BPF_FUNC_l3_csum_replace, - - /** - * l4_csum_replace(skb, offset, from, to, flags) - recompute TCP/UDP checksum - * @skb: pointer to skb - * @offset: offset within packet where TCP/UDP checksum is located - * @from: old value of header field - * @to: new value of header field - * @flags: bits 0-3 - size of header field - * bit 4 - is pseudo header - * other bits - reserved - * Return: 0 on success - */ - BPF_FUNC_l4_csum_replace, - - /** - * bpf_tail_call(ctx, prog_array_map, index) - jump into another BPF program - * @ctx: context pointer passed to next program - * @prog_array_map: pointer to map which type is BPF_MAP_TYPE_PROG_ARRAY - * @index: index inside array that selects specific program to run - * Return: 0 on success - */ - BPF_FUNC_tail_call, - - /** - * bpf_clone_redirect(skb, ifindex, flags) - redirect to another netdev - * @skb: pointer to skb - * @ifindex: ifindex of the net device - * @flags: bit 0 - if set, redirect to ingress instead of egress - * other bits - reserved - * Return: 0 on success - */ - BPF_FUNC_clone_redirect, - - /** - * u64 bpf_get_current_pid_tgid(void) - * Return: current->tgid << 32 | current->pid - */ - BPF_FUNC_get_current_pid_tgid, - - /** - * u64 bpf_get_current_uid_gid(void) - * Return: current_gid << 32 | current_uid - */ - BPF_FUNC_get_current_uid_gid, - - /** - * bpf_get_current_comm(char *buf, int size_of_buf) - * stores current->comm into buf - * Return: 0 on success - */ - BPF_FUNC_get_current_comm, - - /** - * bpf_get_cgroup_classid(skb) - retrieve a proc's classid - * @skb: pointer to skb - * Return: classid if != 0 - */ - BPF_FUNC_get_cgroup_classid, - BPF_FUNC_skb_vlan_push, /* bpf_skb_vlan_push(skb, vlan_proto, vlan_tci) */ - BPF_FUNC_skb_vlan_pop, /* bpf_skb_vlan_pop(skb) */ - - /** - * bpf_skb_[gs]et_tunnel_key(skb, key, size, flags) - * retrieve or populate tunnel metadata - * @skb: pointer to skb - * @key: pointer to 'struct bpf_tunnel_key' - * @size: size of 'struct bpf_tunnel_key' - * @flags: room for future extensions - * Retrun: 0 on success - */ - BPF_FUNC_skb_get_tunnel_key, - BPF_FUNC_skb_set_tunnel_key, - BPF_FUNC_perf_event_read, /* u64 bpf_perf_event_read(&map, index) */ - /** - * bpf_redirect(ifindex, flags) - redirect to another netdev - * @ifindex: ifindex of the net device - * @flags: bit 0 - if set, redirect to ingress instead of egress - * other bits - reserved - * Return: TC_ACT_REDIRECT - */ - BPF_FUNC_redirect, - - /** - * bpf_get_route_realm(skb) - retrieve a dst's tclassid - * @skb: pointer to skb - * Return: realm if != 0 - */ - BPF_FUNC_get_route_realm, - - /** - * bpf_perf_event_output(ctx, map, index, data, size) - output perf raw sample - * @ctx: struct pt_regs* - * @map: pointer to perf_event_array map - * @index: index of event in the map - * @data: data on stack to be output as raw data - * @size: size of data - * Return: 0 on success - */ - BPF_FUNC_perf_event_output, - BPF_FUNC_skb_load_bytes, - - /** - * bpf_get_stackid(ctx, map, flags) - walk user or kernel stack and return id - * @ctx: struct pt_regs* - * @map: pointer to stack_trace map - * @flags: bits 0-7 - numer of stack frames to skip - * bit 8 - collect user stack instead of kernel - * bit 9 - compare stacks by hash only - * bit 10 - if two different stacks hash into the same stackid - * discard old - * other bits - reserved - * Return: >= 0 stackid on success or negative error - */ - BPF_FUNC_get_stackid, - - /** - * bpf_csum_diff(from, from_size, to, to_size, seed) - calculate csum diff - * @from: raw from buffer - * @from_size: length of from buffer - * @to: raw to buffer - * @to_size: length of to buffer - * @seed: optional seed - * Return: csum result - */ - BPF_FUNC_csum_diff, - - /** - * bpf_skb_[gs]et_tunnel_opt(skb, opt, size) - * retrieve or populate tunnel options metadata - * @skb: pointer to skb - * @opt: pointer to raw tunnel option data - * @size: size of @opt - * Return: 0 on success for set, option size for get - */ - BPF_FUNC_skb_get_tunnel_opt, - BPF_FUNC_skb_set_tunnel_opt, - - /** - * bpf_skb_change_proto(skb, proto, flags) - * Change protocol of the skb. Currently supported is - * v4 -> v6, v6 -> v4 transitions. The helper will also - * resize the skb. eBPF program is expected to fill the - * new headers via skb_store_bytes and lX_csum_replace. - * @skb: pointer to skb - * @proto: new skb->protocol type - * @flags: reserved - * Return: 0 on success or negative error - */ - BPF_FUNC_skb_change_proto, - - /** - * bpf_skb_change_type(skb, type) - * Change packet type of skb. - * @skb: pointer to skb - * @type: new skb->pkt_type type - * Return: 0 on success or negative error - */ - BPF_FUNC_skb_change_type, - - /** - * bpf_skb_under_cgroup(skb, map, index) - Check cgroup2 membership of skb - * @skb: pointer to skb - * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type - * @index: index of the cgroup in the bpf_map - * Return: - * == 0 skb failed the cgroup2 descendant test - * == 1 skb succeeded the cgroup2 descendant test - * < 0 error - */ - BPF_FUNC_skb_under_cgroup, - - /** - * bpf_get_hash_recalc(skb) - * Retrieve and possibly recalculate skb->hash. - * @skb: pointer to skb - * Return: hash - */ - BPF_FUNC_get_hash_recalc, - - /** - * u64 bpf_get_current_task(void) - * Returns current task_struct - * Return: current - */ - BPF_FUNC_get_current_task, - - /** - * bpf_probe_write_user(void *dst, void *src, int len) - * safely attempt to write to a location - * @dst: destination address in userspace - * @src: source address on stack - * @len: number of bytes to copy - * Return: 0 on success or negative error - */ - BPF_FUNC_probe_write_user, - - /** - * bpf_current_task_under_cgroup(map, index) - Check cgroup2 membership of current task - * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type - * @index: index of the cgroup in the bpf_map - * Return: - * == 0 current failed the cgroup2 descendant test - * == 1 current succeeded the cgroup2 descendant test - * < 0 error - */ - BPF_FUNC_current_task_under_cgroup, - - /** - * bpf_skb_change_tail(skb, len, flags) - * The helper will resize the skb to the given new size, - * to be used f.e. with control messages. - * @skb: pointer to skb - * @len: new skb length - * @flags: reserved - * Return: 0 on success or negative error - */ - BPF_FUNC_skb_change_tail, - - /** - * bpf_skb_pull_data(skb, len) - * The helper will pull in non-linear data in case the - * skb is non-linear and not all of len are part of the - * linear section. Only needed for read/write with direct - * packet access. - * @skb: pointer to skb - * @len: len to make read/writeable - * Return: 0 on success or negative error - */ - BPF_FUNC_skb_pull_data, - - /** - * bpf_csum_update(skb, csum) - * Adds csum into skb->csum in case of CHECKSUM_COMPLETE. - * @skb: pointer to skb - * @csum: csum to add - * Return: csum on success or negative error - */ - BPF_FUNC_csum_update, - - /** - * bpf_set_hash_invalid(skb) - * Invalidate current skb>hash. - * @skb: pointer to skb - */ - BPF_FUNC_set_hash_invalid, - + __BPF_FUNC_MAPPER(__BPF_ENUM_FN) __BPF_FUNC_MAX_ID, }; +#undef __BPF_ENUM_FN /* All flags used by eBPF helper functions, placed here. */ @@ -501,6 +560,31 @@ struct bpf_tunnel_key { __u32 tunnel_label; }; +/* Generic BPF return codes which all BPF program types may support. + * The values are binary compatible with their TC_ACT_* counter-part to + * provide backwards compatibility with existing SCHED_CLS and SCHED_ACT + * programs. + * + * XDP is handled seprately, see XDP_*. + */ +enum bpf_ret_code { + BPF_OK = 0, + /* 1 reserved */ + BPF_DROP = 2, + /* 3-6 reserved */ + BPF_REDIRECT = 7, + /* >127 are reserved for prog type specific return codes */ +}; + +struct bpf_sock { + __u32 bound_dev_if; + __u32 family; + __u32 type; + __u32 protocol; +}; + +#define XDP_PACKET_HEADROOM 256 + /* User return codes for XDP prog type. * A valid XDP program must return one of these defined values. All other * return codes are reserved for future use. Unknown return codes will result diff --git a/include/uapi/linux/can.h b/include/uapi/linux/can.h index 9692cda5f8fc..c48d93a28d1a 100644 --- a/include/uapi/linux/can.h +++ b/include/uapi/linux/can.h @@ -196,5 +196,6 @@ struct can_filter { }; #define CAN_INV_FILTER 0x20000000U /* to be set in can_filter.can_id */ +#define CAN_RAW_FILTER_MAX 512 /* maximum number of can_filter set via setsockopt() */ #endif /* !_UAPI_CAN_H */ diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 915bfa74458c..9014c33d4e77 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -102,6 +102,13 @@ enum devlink_eswitch_mode { DEVLINK_ESWITCH_MODE_SWITCHDEV, }; +enum devlink_eswitch_inline_mode { + DEVLINK_ESWITCH_INLINE_MODE_NONE, + DEVLINK_ESWITCH_INLINE_MODE_LINK, + DEVLINK_ESWITCH_INLINE_MODE_NETWORK, + DEVLINK_ESWITCH_INLINE_MODE_TRANSPORT, +}; + enum devlink_attr { /* don't change the order or add anything between, this is ABI! */ DEVLINK_ATTR_UNSPEC, @@ -133,6 +140,7 @@ enum devlink_attr { DEVLINK_ATTR_SB_OCC_CUR, /* u32 */ DEVLINK_ATTR_SB_OCC_MAX, /* u32 */ DEVLINK_ATTR_ESWITCH_MODE, /* u16 */ + DEVLINK_ATTR_ESWITCH_INLINE_MODE, /* u8 */ /* add new attributes above here, update the policy in devlink.c */ diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index 8e547231c1b7..f0db7788f887 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -248,6 +248,19 @@ struct ethtool_tunable { void *data[0]; }; +#define DOWNSHIFT_DEV_DEFAULT_COUNT 0xff +#define DOWNSHIFT_DEV_DISABLE 0 + +enum phy_tunable_id { + ETHTOOL_PHY_ID_UNSPEC, + ETHTOOL_PHY_DOWNSHIFT, + /* + * Add your fresh new phy tunable attribute above and remember to update + * phy_tunable_strings[] in net/core/ethtool.c + */ + __ETHTOOL_PHY_TUNABLE_COUNT, +}; + /** * struct ethtool_regs - hardware register dump * @cmd: Command number = %ETHTOOL_GREGS @@ -548,6 +561,7 @@ struct ethtool_pauseparam { * @ETH_SS_FEATURES: Device feature names * @ETH_SS_RSS_HASH_FUNCS: RSS hush function names * @ETH_SS_PHY_STATS: Statistic names, for use with %ETHTOOL_GPHYSTATS + * @ETH_SS_PHY_TUNABLES: PHY tunable names */ enum ethtool_stringset { ETH_SS_TEST = 0, @@ -558,6 +572,7 @@ enum ethtool_stringset { ETH_SS_RSS_HASH_FUNCS, ETH_SS_TUNABLES, ETH_SS_PHY_STATS, + ETH_SS_PHY_TUNABLES, }; /** @@ -1313,7 +1328,8 @@ struct ethtool_per_queue_op { #define ETHTOOL_GLINKSETTINGS 0x0000004c /* Get ethtool_link_settings */ #define ETHTOOL_SLINKSETTINGS 0x0000004d /* Set ethtool_link_settings */ - +#define ETHTOOL_PHY_GTUNABLE 0x0000004e /* Get PHY tunable configuration */ +#define ETHTOOL_PHY_STUNABLE 0x0000004f /* Set PHY tunable configuration */ /* compatibility with older code */ #define SPARC_ETH_GSET ETHTOOL_GSET diff --git a/include/uapi/linux/fib_rules.h b/include/uapi/linux/fib_rules.h index 14404b3ebb89..bbf02a63a011 100644 --- a/include/uapi/linux/fib_rules.h +++ b/include/uapi/linux/fib_rules.h @@ -29,6 +29,11 @@ struct fib_rule_hdr { __u32 flags; }; +struct fib_rule_uid_range { + __u32 start; + __u32 end; +}; + enum { FRA_UNSPEC, FRA_DST, /* destination address */ @@ -51,6 +56,7 @@ enum { FRA_OIFNAME, FRA_PAD, FRA_L3MDEV, /* iif or oif is l3mdev goto its table */ + FRA_UID_RANGE, /* UID range */ __FRA_MAX }; diff --git a/include/uapi/linux/genetlink.h b/include/uapi/linux/genetlink.h index 5512c90af7e3..adc899381e0d 100644 --- a/include/uapi/linux/genetlink.h +++ b/include/uapi/linux/genetlink.h @@ -26,10 +26,11 @@ struct genlmsghdr { /* * List of reserved static generic netlink identifiers: */ -#define GENL_ID_GENERATE 0 #define GENL_ID_CTRL NLMSG_MIN_TYPE #define GENL_ID_VFS_DQUOT (NLMSG_MIN_TYPE + 1) #define GENL_ID_PMCRAID (NLMSG_MIN_TYPE + 2) +/* must be last reserved + 1 */ +#define GENL_START_ALLOC (NLMSG_MIN_TYPE + 3) /************************************************************************** * Controller diff --git a/include/uapi/linux/if.h b/include/uapi/linux/if.h index e601c8c3bdc7..1158a043342a 100644 --- a/include/uapi/linux/if.h +++ b/include/uapi/linux/if.h @@ -31,7 +31,7 @@ #include <linux/hdlc/ioctl.h> /* For glibc compatibility. An empty enum does not compile. */ -#if __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO != 0 && \ +#if __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO != 0 || \ __UAPI_DEF_IF_NET_DEVICE_FLAGS != 0 /** * enum net_device_flags - &struct net_device flags @@ -99,7 +99,7 @@ enum net_device_flags { IFF_ECHO = 1<<18, /* volatile */ #endif /* __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO */ }; -#endif /* __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO != 0 && __UAPI_DEF_IF_NET_DEVICE_FLAGS != 0 */ +#endif /* __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO != 0 || __UAPI_DEF_IF_NET_DEVICE_FLAGS != 0 */ /* for compatibility with glibc net/if.h */ #if __UAPI_DEF_IF_NET_DEVICE_FLAGS diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h index 117d02e0fc31..3e5185e9ef03 100644 --- a/include/uapi/linux/if_ether.h +++ b/include/uapi/linux/if_ether.h @@ -35,6 +35,9 @@ #define ETH_FRAME_LEN 1514 /* Max. octets in frame sans FCS */ #define ETH_FCS_LEN 4 /* Octets in the FCS */ +#define ETH_MIN_MTU 68 /* Min IPv4 MTU per RFC791 */ +#define ETH_MAX_MTU 0xFFFFU /* 65535, same as IP_MAX_MTU */ + /* * These are the defined Ethernet Protocol ID's. */ diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index b4fba662cd32..6b13e591abc9 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -275,6 +275,8 @@ enum { IFLA_BR_PAD, IFLA_BR_VLAN_STATS_ENABLED, IFLA_BR_MCAST_STATS_ENABLED, + IFLA_BR_MCAST_IGMP_VERSION, + IFLA_BR_MCAST_MLD_VERSION, __IFLA_BR_MAX, }; @@ -874,10 +876,14 @@ enum { /* XDP section */ +#define XDP_FLAGS_UPDATE_IF_NOEXIST (1U << 0) +#define XDP_FLAGS_MASK (XDP_FLAGS_UPDATE_IF_NOEXIST) + enum { IFLA_XDP_UNSPEC, IFLA_XDP_FD, IFLA_XDP_ATTACHED, + IFLA_XDP_FLAGS, __IFLA_XDP_MAX, }; diff --git a/include/uapi/linux/if_pppol2tp.h b/include/uapi/linux/if_pppol2tp.h index 4bd1f55d6377..6418c4d10241 100644 --- a/include/uapi/linux/if_pppol2tp.h +++ b/include/uapi/linux/if_pppol2tp.h @@ -18,6 +18,7 @@ #include <linux/types.h> #include <linux/in.h> #include <linux/in6.h> +#include <linux/l2tp.h> /* Structure used to connect() the socket to a particular tunnel UDP * socket over IPv4. @@ -90,14 +91,12 @@ enum { PPPOL2TP_SO_REORDERTO = 5, }; -/* Debug message categories for the DEBUG socket option */ +/* Debug message categories for the DEBUG socket option (deprecated) */ enum { - PPPOL2TP_MSG_DEBUG = (1 << 0), /* verbose debug (if - * compiled in) */ - PPPOL2TP_MSG_CONTROL = (1 << 1), /* userspace - kernel - * interface */ - PPPOL2TP_MSG_SEQ = (1 << 2), /* sequence numbers */ - PPPOL2TP_MSG_DATA = (1 << 3), /* data packets */ + PPPOL2TP_MSG_DEBUG = L2TP_MSG_DEBUG, + PPPOL2TP_MSG_CONTROL = L2TP_MSG_CONTROL, + PPPOL2TP_MSG_SEQ = L2TP_MSG_SEQ, + PPPOL2TP_MSG_DATA = L2TP_MSG_DATA, }; diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h index eaf94919291a..4e557f4e9553 100644 --- a/include/uapi/linux/in.h +++ b/include/uapi/linux/in.h @@ -117,6 +117,7 @@ struct in_addr { #define IP_NODEFRAG 22 #define IP_CHECKSUM 23 #define IP_BIND_ADDRESS_NO_PORT 24 +#define IP_RECVFRAGSIZE 25 /* IP_MTU_DISCOVER values */ #define IP_PMTUDISC_DONT 0 /* Never send DF frames */ diff --git a/include/uapi/linux/in6.h b/include/uapi/linux/in6.h index b39ea4f2e701..46444f8fbee4 100644 --- a/include/uapi/linux/in6.h +++ b/include/uapi/linux/in6.h @@ -283,6 +283,7 @@ struct in6_flowlabel_req { #define IPV6_RECVORIGDSTADDR IPV6_ORIGDSTADDR #define IPV6_TRANSPARENT 75 #define IPV6_UNICAST_IF 76 +#define IPV6_RECVFRAGSIZE 77 /* * Multicast Routing: diff --git a/include/uapi/linux/inet_diag.h b/include/uapi/linux/inet_diag.h index 509cd961068d..bbe201047df6 100644 --- a/include/uapi/linux/inet_diag.h +++ b/include/uapi/linux/inet_diag.h @@ -43,6 +43,23 @@ struct inet_diag_req_v2 { struct inet_diag_sockid id; }; +/* + * SOCK_RAW sockets require the underlied protocol to be + * additionally specified so we can use @pad member for + * this, but we can't rename it because userspace programs + * still may depend on this name. Instead lets use another + * structure definition as an alias for struct + * @inet_diag_req_v2. + */ +struct inet_diag_req_raw { + __u8 sdiag_family; + __u8 sdiag_protocol; + __u8 idiag_ext; + __u8 sdiag_raw_protocol; + __u32 idiag_states; + struct inet_diag_sockid id; +}; + enum { INET_DIAG_REQ_NONE, INET_DIAG_REQ_BYTECODE, diff --git a/include/uapi/linux/input-event-codes.h b/include/uapi/linux/input-event-codes.h index d6d071fc3c56..3af60ee69053 100644 --- a/include/uapi/linux/input-event-codes.h +++ b/include/uapi/linux/input-event-codes.h @@ -640,7 +640,7 @@ * Control a data application associated with the currently viewed channel, * e.g. teletext or data broadcast application (MHEG, MHP, HbbTV, etc.) */ -#define KEY_DATA 0x275 +#define KEY_DATA 0x277 #define BTN_TRIGGER_HAPPY 0x2c0 #define BTN_TRIGGER_HAPPY1 0x2c0 diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h index 8c2772340c3f..eaf65dc82e22 100644 --- a/include/uapi/linux/ipv6.h +++ b/include/uapi/linux/ipv6.h @@ -39,6 +39,7 @@ struct in6_ifreq { #define IPV6_SRCRT_STRICT 0x01 /* Deprecated; will be removed */ #define IPV6_SRCRT_TYPE_0 0 /* Deprecated; will be removed */ #define IPV6_SRCRT_TYPE_2 2 /* IPv6 type 2 Routing Header */ +#define IPV6_SRCRT_TYPE_4 4 /* Segment Routing with IPv6 */ /* * routing header @@ -178,6 +179,9 @@ enum { DEVCONF_DROP_UNSOLICITED_NA, DEVCONF_KEEP_ADDR_ON_DOWN, DEVCONF_RTR_SOLICIT_MAX_INTERVAL, + DEVCONF_SEG6_ENABLED, + DEVCONF_SEG6_REQUIRE_HMAC, + DEVCONF_ENHANCED_DAD, DEVCONF_MAX }; diff --git a/include/uapi/linux/l2tp.h b/include/uapi/linux/l2tp.h index 4bd27d0270a2..85ddb74fcd1c 100644 --- a/include/uapi/linux/l2tp.h +++ b/include/uapi/linux/l2tp.h @@ -108,7 +108,7 @@ enum { L2TP_ATTR_VLAN_ID, /* u16 */ L2TP_ATTR_COOKIE, /* 0, 4 or 8 bytes */ L2TP_ATTR_PEER_COOKIE, /* 0, 4 or 8 bytes */ - L2TP_ATTR_DEBUG, /* u32 */ + L2TP_ATTR_DEBUG, /* u32, enum l2tp_debug_flags */ L2TP_ATTR_RECV_SEQ, /* u8 */ L2TP_ATTR_SEND_SEQ, /* u8 */ L2TP_ATTR_LNS_MODE, /* u8 */ @@ -124,8 +124,8 @@ enum { L2TP_ATTR_STATS, /* nested */ L2TP_ATTR_IP6_SADDR, /* struct in6_addr */ L2TP_ATTR_IP6_DADDR, /* struct in6_addr */ - L2TP_ATTR_UDP_ZERO_CSUM6_TX, /* u8 */ - L2TP_ATTR_UDP_ZERO_CSUM6_RX, /* u8 */ + L2TP_ATTR_UDP_ZERO_CSUM6_TX, /* flag */ + L2TP_ATTR_UDP_ZERO_CSUM6_RX, /* flag */ L2TP_ATTR_PAD, __L2TP_ATTR_MAX, }; @@ -175,6 +175,21 @@ enum l2tp_seqmode { L2TP_SEQ_ALL = 2, }; +/** + * enum l2tp_debug_flags - debug message categories for L2TP tunnels/sessions + * + * @L2TP_MSG_DEBUG: verbose debug (if compiled in) + * @L2TP_MSG_CONTROL: userspace - kernel interface + * @L2TP_MSG_SEQ: sequence numbers + * @L2TP_MSG_DATA: data packets + */ +enum l2tp_debug_flags { + L2TP_MSG_DEBUG = (1 << 0), + L2TP_MSG_CONTROL = (1 << 1), + L2TP_MSG_SEQ = (1 << 2), + L2TP_MSG_DATA = (1 << 3), +}; + /* * NETLINK_GENERIC related info */ diff --git a/include/uapi/linux/lwtunnel.h b/include/uapi/linux/lwtunnel.h index a478fe80e203..92724cba1eba 100644 --- a/include/uapi/linux/lwtunnel.h +++ b/include/uapi/linux/lwtunnel.h @@ -9,6 +9,8 @@ enum lwtunnel_encap_types { LWTUNNEL_ENCAP_IP, LWTUNNEL_ENCAP_ILA, LWTUNNEL_ENCAP_IP6, + LWTUNNEL_ENCAP_SEG6, + LWTUNNEL_ENCAP_BPF, __LWTUNNEL_ENCAP_MAX, }; @@ -42,4 +44,26 @@ enum lwtunnel_ip6_t { #define LWTUNNEL_IP6_MAX (__LWTUNNEL_IP6_MAX - 1) +enum { + LWT_BPF_PROG_UNSPEC, + LWT_BPF_PROG_FD, + LWT_BPF_PROG_NAME, + __LWT_BPF_PROG_MAX, +}; + +#define LWT_BPF_PROG_MAX (__LWT_BPF_PROG_MAX - 1) + +enum { + LWT_BPF_UNSPEC, + LWT_BPF_IN, + LWT_BPF_OUT, + LWT_BPF_XMIT, + LWT_BPF_XMIT_HEADROOM, + __LWT_BPF_MAX, +}; + +#define LWT_BPF_MAX (__LWT_BPF_MAX - 1) + +#define LWT_BPF_MAX_HEADROOM 256 + #endif /* _UAPI_LWTUNNEL_H_ */ diff --git a/include/uapi/linux/major.h b/include/uapi/linux/major.h index 620252e69b44..19e195bee990 100644 --- a/include/uapi/linux/major.h +++ b/include/uapi/linux/major.h @@ -3,7 +3,7 @@ /* * This file has definitions for major device numbers. - * For the device number assignments, see Documentation/devices.txt. + * For the device number assignments, see Documentation/admin-guide/devices.rst. */ #define UNNAMED_MAJOR 0 diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h index 264e515de16f..464dcca5ed68 100644 --- a/include/uapi/linux/net_tstamp.h +++ b/include/uapi/linux/net_tstamp.h @@ -25,8 +25,9 @@ enum { SOF_TIMESTAMPING_TX_ACK = (1<<9), SOF_TIMESTAMPING_OPT_CMSG = (1<<10), SOF_TIMESTAMPING_OPT_TSONLY = (1<<11), + SOF_TIMESTAMPING_OPT_STATS = (1<<12), - SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_TSONLY, + SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_STATS, SOF_TIMESTAMPING_MASK = (SOF_TIMESTAMPING_LAST - 1) | SOF_TIMESTAMPING_LAST }; diff --git a/include/uapi/linux/netfilter.h b/include/uapi/linux/netfilter.h index d93f949d1d9a..7550e9176a54 100644 --- a/include/uapi/linux/netfilter.h +++ b/include/uapi/linux/netfilter.h @@ -13,7 +13,7 @@ #define NF_STOLEN 2 #define NF_QUEUE 3 #define NF_REPEAT 4 -#define NF_STOP 5 +#define NF_STOP 5 /* Deprecated, for userspace nf_queue compatibility. */ #define NF_MAX_VERDICT NF_STOP /* we overload the higher bits for encoding auxiliary data such as the queue diff --git a/include/uapi/linux/netfilter/Kbuild b/include/uapi/linux/netfilter/Kbuild index cd26d7a0fd07..03f194aeadc5 100644 --- a/include/uapi/linux/netfilter/Kbuild +++ b/include/uapi/linux/netfilter/Kbuild @@ -5,6 +5,7 @@ header-y += nf_conntrack_ftp.h header-y += nf_conntrack_sctp.h header-y += nf_conntrack_tcp.h header-y += nf_conntrack_tuple_common.h +header-y += nf_log.h header-y += nf_tables.h header-y += nf_tables_compat.h header-y += nf_nat.h diff --git a/include/uapi/linux/netfilter/nf_conntrack_tuple_common.h b/include/uapi/linux/netfilter/nf_conntrack_tuple_common.h index a9c3834abdd4..526b42496b78 100644 --- a/include/uapi/linux/netfilter/nf_conntrack_tuple_common.h +++ b/include/uapi/linux/netfilter/nf_conntrack_tuple_common.h @@ -2,7 +2,10 @@ #define _NF_CONNTRACK_TUPLE_COMMON_H #include <linux/types.h> +#ifndef __KERNEL__ #include <linux/netfilter.h> +#endif +#include <linux/netfilter/nf_conntrack_common.h> /* IP_CT_IS_REPLY */ enum ip_conntrack_dir { IP_CT_DIR_ORIGINAL, diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index c6c4477c136b..881d49e94569 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -4,6 +4,7 @@ #define NFT_TABLE_MAXNAMELEN 32 #define NFT_CHAIN_MAXNAMELEN 32 #define NFT_SET_MAXNAMELEN 32 +#define NFT_OBJ_MAXNAMELEN 32 #define NFT_USERDATA_MAXLEN 256 /** @@ -85,6 +86,10 @@ enum nft_verdicts { * @NFT_MSG_NEWGEN: announce a new generation, only for events (enum nft_gen_attributes) * @NFT_MSG_GETGEN: get the rule-set generation (enum nft_gen_attributes) * @NFT_MSG_TRACE: trace event (enum nft_trace_attributes) + * @NFT_MSG_NEWOBJ: create a stateful object (enum nft_obj_attributes) + * @NFT_MSG_GETOBJ: get a stateful object (enum nft_obj_attributes) + * @NFT_MSG_DELOBJ: delete a stateful object (enum nft_obj_attributes) + * @NFT_MSG_GETOBJ_RESET: get and reset a stateful object (enum nft_obj_attributes) */ enum nf_tables_msg_types { NFT_MSG_NEWTABLE, @@ -105,6 +110,10 @@ enum nf_tables_msg_types { NFT_MSG_NEWGEN, NFT_MSG_GETGEN, NFT_MSG_TRACE, + NFT_MSG_NEWOBJ, + NFT_MSG_GETOBJ, + NFT_MSG_DELOBJ, + NFT_MSG_GETOBJ_RESET, NFT_MSG_MAX, }; @@ -246,6 +255,7 @@ enum nft_rule_compat_attributes { * @NFT_SET_MAP: set is used as a dictionary * @NFT_SET_TIMEOUT: set uses timeouts * @NFT_SET_EVAL: set contains expressions for evaluation + * @NFT_SET_OBJECT: set contains stateful objects */ enum nft_set_flags { NFT_SET_ANONYMOUS = 0x1, @@ -254,6 +264,7 @@ enum nft_set_flags { NFT_SET_MAP = 0x8, NFT_SET_TIMEOUT = 0x10, NFT_SET_EVAL = 0x20, + NFT_SET_OBJECT = 0x40, }; /** @@ -295,6 +306,7 @@ enum nft_set_desc_attributes { * @NFTA_SET_TIMEOUT: default timeout value (NLA_U64) * @NFTA_SET_GC_INTERVAL: garbage collection interval (NLA_U32) * @NFTA_SET_USERDATA: user data (NLA_BINARY) + * @NFTA_SET_OBJ_TYPE: stateful object type (NLA_U32: NFT_OBJECT_*) */ enum nft_set_attributes { NFTA_SET_UNSPEC, @@ -312,6 +324,7 @@ enum nft_set_attributes { NFTA_SET_GC_INTERVAL, NFTA_SET_USERDATA, NFTA_SET_PAD, + NFTA_SET_OBJ_TYPE, __NFTA_SET_MAX }; #define NFTA_SET_MAX (__NFTA_SET_MAX - 1) @@ -335,6 +348,7 @@ enum nft_set_elem_flags { * @NFTA_SET_ELEM_EXPIRATION: expiration time (NLA_U64) * @NFTA_SET_ELEM_USERDATA: user data (NLA_BINARY) * @NFTA_SET_ELEM_EXPR: expression (NLA_NESTED: nft_expr_attributes) + * @NFTA_SET_ELEM_OBJREF: stateful object reference (NLA_STRING) */ enum nft_set_elem_attributes { NFTA_SET_ELEM_UNSPEC, @@ -346,6 +360,7 @@ enum nft_set_elem_attributes { NFTA_SET_ELEM_USERDATA, NFTA_SET_ELEM_EXPR, NFTA_SET_ELEM_PAD, + NFTA_SET_ELEM_OBJREF, __NFTA_SET_ELEM_MAX }; #define NFTA_SET_ELEM_MAX (__NFTA_SET_ELEM_MAX - 1) @@ -659,6 +674,10 @@ enum nft_payload_csum_types { NFT_PAYLOAD_CSUM_INET, }; +enum nft_payload_csum_flags { + NFT_PAYLOAD_L4CSUM_PSEUDOHDR = (1 << 0), +}; + /** * enum nft_payload_attributes - nf_tables payload expression netlink attributes * @@ -669,6 +688,7 @@ enum nft_payload_csum_types { * @NFTA_PAYLOAD_SREG: source register to load data from (NLA_U32: nft_registers) * @NFTA_PAYLOAD_CSUM_TYPE: checksum type (NLA_U32) * @NFTA_PAYLOAD_CSUM_OFFSET: checksum offset relative to base (NLA_U32) + * @NFTA_PAYLOAD_CSUM_FLAGS: checksum flags (NLA_U32) */ enum nft_payload_attributes { NFTA_PAYLOAD_UNSPEC, @@ -679,6 +699,7 @@ enum nft_payload_attributes { NFTA_PAYLOAD_SREG, NFTA_PAYLOAD_CSUM_TYPE, NFTA_PAYLOAD_CSUM_OFFSET, + NFTA_PAYLOAD_CSUM_FLAGS, __NFTA_PAYLOAD_MAX }; #define NFTA_PAYLOAD_MAX (__NFTA_PAYLOAD_MAX - 1) @@ -759,6 +780,19 @@ enum nft_meta_keys { }; /** + * enum nft_rt_keys - nf_tables routing expression keys + * + * @NFT_RT_CLASSID: realm value of packet's route (skb->dst->tclassid) + * @NFT_RT_NEXTHOP4: routing nexthop for IPv4 + * @NFT_RT_NEXTHOP6: routing nexthop for IPv6 + */ +enum nft_rt_keys { + NFT_RT_CLASSID, + NFT_RT_NEXTHOP4, + NFT_RT_NEXTHOP6, +}; + +/** * enum nft_hash_attributes - nf_tables hash expression netlink attributes * * @NFTA_HASH_SREG: source register (NLA_U32) @@ -797,6 +831,20 @@ enum nft_meta_attributes { #define NFTA_META_MAX (__NFTA_META_MAX - 1) /** + * enum nft_rt_attributes - nf_tables routing expression netlink attributes + * + * @NFTA_RT_DREG: destination register (NLA_U32) + * @NFTA_RT_KEY: routing data item to load (NLA_U32: nft_rt_keys) + */ +enum nft_rt_attributes { + NFTA_RT_UNSPEC, + NFTA_RT_DREG, + NFTA_RT_KEY, + __NFTA_RT_MAX +}; +#define NFTA_RT_MAX (__NFTA_RT_MAX - 1) + +/** * enum nft_ct_keys - nf_tables ct expression keys * * @NFT_CT_STATE: conntrack state (bitmask of enum ip_conntrack_info) @@ -941,6 +989,7 @@ enum nft_queue_attributes { enum nft_quota_flags { NFT_QUOTA_F_INV = (1 << 0), + NFT_QUOTA_F_DEPLETED = (1 << 1), }; /** @@ -948,12 +997,14 @@ enum nft_quota_flags { * * @NFTA_QUOTA_BYTES: quota in bytes (NLA_U16) * @NFTA_QUOTA_FLAGS: flags (NLA_U32) + * @NFTA_QUOTA_CONSUMED: quota already consumed in bytes (NLA_U64) */ enum nft_quota_attributes { NFTA_QUOTA_UNSPEC, NFTA_QUOTA_BYTES, NFTA_QUOTA_FLAGS, NFTA_QUOTA_PAD, + NFTA_QUOTA_CONSUMED, __NFTA_QUOTA_MAX }; #define NFTA_QUOTA_MAX (__NFTA_QUOTA_MAX - 1) @@ -1098,6 +1149,26 @@ enum nft_fwd_attributes { #define NFTA_FWD_MAX (__NFTA_FWD_MAX - 1) /** + * enum nft_objref_attributes - nf_tables stateful object expression netlink attributes + * + * @NFTA_OBJREF_IMM_TYPE: object type for immediate reference (NLA_U32: nft_register) + * @NFTA_OBJREF_IMM_NAME: object name for immediate reference (NLA_STRING) + * @NFTA_OBJREF_SET_SREG: source register of the data to look for (NLA_U32: nft_registers) + * @NFTA_OBJREF_SET_NAME: name of the set where to look for (NLA_STRING) + * @NFTA_OBJREF_SET_ID: id of the set where to look for in this transaction (NLA_U32) + */ +enum nft_objref_attributes { + NFTA_OBJREF_UNSPEC, + NFTA_OBJREF_IMM_TYPE, + NFTA_OBJREF_IMM_NAME, + NFTA_OBJREF_SET_SREG, + NFTA_OBJREF_SET_NAME, + NFTA_OBJREF_SET_ID, + __NFTA_OBJREF_MAX +}; +#define NFTA_OBJREF_MAX (__NFTA_OBJREF_MAX - 1) + +/** * enum nft_gen_attributes - nf_tables ruleset generation attributes * * @NFTA_GEN_ID: Ruleset generation ID (NLA_U32) @@ -1109,6 +1180,68 @@ enum nft_gen_attributes { }; #define NFTA_GEN_MAX (__NFTA_GEN_MAX - 1) +/* + * enum nft_fib_attributes - nf_tables fib expression netlink attributes + * + * @NFTA_FIB_DREG: destination register (NLA_U32) + * @NFTA_FIB_RESULT: desired result (NLA_U32) + * @NFTA_FIB_FLAGS: flowi fields to initialize when querying the FIB (NLA_U32) + * + * The FIB expression performs a route lookup according + * to the packet data. + */ +enum nft_fib_attributes { + NFTA_FIB_UNSPEC, + NFTA_FIB_DREG, + NFTA_FIB_RESULT, + NFTA_FIB_FLAGS, + __NFTA_FIB_MAX +}; +#define NFTA_FIB_MAX (__NFTA_FIB_MAX - 1) + +enum nft_fib_result { + NFT_FIB_RESULT_UNSPEC, + NFT_FIB_RESULT_OIF, + NFT_FIB_RESULT_OIFNAME, + NFT_FIB_RESULT_ADDRTYPE, + __NFT_FIB_RESULT_MAX +}; +#define NFT_FIB_RESULT_MAX (__NFT_FIB_RESULT_MAX - 1) + +enum nft_fib_flags { + NFTA_FIB_F_SADDR = 1 << 0, /* look up src */ + NFTA_FIB_F_DADDR = 1 << 1, /* look up dst */ + NFTA_FIB_F_MARK = 1 << 2, /* use skb->mark */ + NFTA_FIB_F_IIF = 1 << 3, /* restrict to iif */ + NFTA_FIB_F_OIF = 1 << 4, /* restrict to oif */ +}; + +#define NFT_OBJECT_UNSPEC 0 +#define NFT_OBJECT_COUNTER 1 +#define NFT_OBJECT_QUOTA 2 +#define __NFT_OBJECT_MAX 3 +#define NFT_OBJECT_MAX (__NFT_OBJECT_MAX - 1) + +/** + * enum nft_object_attributes - nf_tables stateful object netlink attributes + * + * @NFTA_OBJ_TABLE: name of the table containing the expression (NLA_STRING) + * @NFTA_OBJ_NAME: name of this expression type (NLA_STRING) + * @NFTA_OBJ_TYPE: stateful object type (NLA_U32) + * @NFTA_OBJ_DATA: stateful object data (NLA_NESTED) + * @NFTA_OBJ_USE: number of references to this expression (NLA_U32) + */ +enum nft_object_attributes { + NFTA_OBJ_UNSPEC, + NFTA_OBJ_TABLE, + NFTA_OBJ_NAME, + NFTA_OBJ_TYPE, + NFTA_OBJ_DATA, + NFTA_OBJ_USE, + __NFTA_OBJ_MAX +}; +#define NFTA_OBJ_MAX (__NFTA_OBJ_MAX - 1) + /** * enum nft_trace_attributes - nf_tables trace netlink attributes * diff --git a/include/uapi/linux/netfilter/xt_bpf.h b/include/uapi/linux/netfilter/xt_bpf.h index 1fad2c27ac32..b97725af2ac0 100644 --- a/include/uapi/linux/netfilter/xt_bpf.h +++ b/include/uapi/linux/netfilter/xt_bpf.h @@ -2,9 +2,11 @@ #define _XT_BPF_H #include <linux/filter.h> +#include <linux/limits.h> #include <linux/types.h> #define XT_BPF_MAX_NUM_INSTR 64 +#define XT_BPF_PATH_MAX (XT_BPF_MAX_NUM_INSTR * sizeof(struct sock_filter)) struct bpf_prog; @@ -16,4 +18,23 @@ struct xt_bpf_info { struct bpf_prog *filter __attribute__((aligned(8))); }; +enum xt_bpf_modes { + XT_BPF_MODE_BYTECODE, + XT_BPF_MODE_FD_PINNED, + XT_BPF_MODE_FD_ELF, +}; + +struct xt_bpf_info_v1 { + __u16 mode; + __u16 bpf_program_num_elem; + __s32 fd; + union { + struct sock_filter bpf_program[XT_BPF_MAX_NUM_INSTR]; + char path[XT_BPF_PATH_MAX]; + }; + + /* only used in the kernel */ + struct bpf_prog *filter __attribute__((aligned(8))); +}; + #endif /*_XT_BPF_H */ diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 56368e9b4622..6b76e3b0c18e 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -323,7 +323,7 @@ * @NL80211_CMD_GET_SCAN: get scan results * @NL80211_CMD_TRIGGER_SCAN: trigger a new scan with the given parameters * %NL80211_ATTR_TX_NO_CCK_RATE is used to decide whether to send the - * probe requests at CCK rate or not. %NL80211_ATTR_MAC can be used to + * probe requests at CCK rate or not. %NL80211_ATTR_BSSID can be used to * specify a BSSID to scan for; if not included, the wildcard BSSID will * be used. * @NL80211_CMD_NEW_SCAN_RESULTS: scan notification (as a reply to @@ -600,6 +600,20 @@ * * @NL80211_CMD_SET_WDS_PEER: Set the MAC address of the peer on a WDS interface. * + * @NL80211_CMD_SET_MULTICAST_TO_UNICAST: Configure if this AP should perform + * multicast to unicast conversion. When enabled, all multicast packets + * with ethertype ARP, IPv4 or IPv6 (possibly within an 802.1Q header) + * will be sent out to each station once with the destination (multicast) + * MAC address replaced by the station's MAC address. Note that this may + * break certain expectations of the receiver, e.g. the ability to drop + * unicast IP packets encapsulated in multicast L2 frames, or the ability + * to not send destination unreachable messages in such cases. + * This can only be toggled per BSS. Configure this on an interface of + * type %NL80211_IFTYPE_AP. It applies to all its VLAN interfaces + * (%NL80211_IFTYPE_AP_VLAN), except for those in 4addr (WDS) mode. + * If %NL80211_ATTR_MULTICAST_TO_UNICAST_ENABLED is not present with this + * command, the feature is disabled. + * * @NL80211_CMD_JOIN_MESH: Join a mesh. The mesh ID must be given, and initial * mesh config parameters may be given. * @NL80211_CMD_LEAVE_MESH: Leave the mesh network -- no special arguments, the @@ -874,6 +888,12 @@ * This will contain a %NL80211_ATTR_NAN_MATCH nested attribute and * %NL80211_ATTR_COOKIE. * + * @NL80211_CMD_UPDATE_CONNECT_PARAMS: Update one or more connect parameters + * for subsequent roaming cases if the driver or firmware uses internal + * BSS selection. This command can be issued only while connected and it + * does not result in a change for the current association. Currently, + * only the %NL80211_ATTR_IE data is used and updated with this command. + * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use */ @@ -1069,6 +1089,10 @@ enum nl80211_commands { NL80211_CMD_CHANGE_NAN_CONFIG, NL80211_CMD_NAN_MATCH, + NL80211_CMD_SET_MULTICAST_TO_UNICAST, + + NL80211_CMD_UPDATE_CONNECT_PARAMS, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ @@ -1638,8 +1662,16 @@ enum nl80211_commands { * the connection request from a station. nl80211_connect_failed_reason * enum has different reasons of connection failure. * - * @NL80211_ATTR_SAE_DATA: SAE elements in Authentication frames. This starts - * with the Authentication transaction sequence number field. + * @NL80211_ATTR_AUTH_DATA: Fields and elements in Authentication frames. + * This contains the authentication frame body (non-IE and IE data), + * excluding the Authentication algorithm number, i.e., starting at the + * Authentication transaction sequence number field. It is used with + * authentication algorithms that need special fields to be added into + * the frames (SAE and FILS). Currently, only the SAE cases use the + * initial two fields (Authentication transaction sequence number and + * Status code). However, those fields are included in the attribute data + * for all authentication algorithms to keep the attribute definition + * consistent. * * @NL80211_ATTR_VHT_CAPABILITY: VHT Capability information element (from * association request when used with NL80211_CMD_NEW_STATION) @@ -1936,6 +1968,17 @@ enum nl80211_commands { * attribute. * @NL80211_ATTR_NAN_MATCH: used to report a match. This is a nested attribute. * See &enum nl80211_nan_match_attributes. + * @NL80211_ATTR_FILS_KEK: KEK for FILS (Re)Association Request/Response frame + * protection. + * @NL80211_ATTR_FILS_NONCES: Nonces (part of AAD) for FILS (Re)Association + * Request/Response frame protection. This attribute contains the 16 octet + * STA Nonce followed by 16 octets of AP Nonce. + * + * @NL80211_ATTR_MULTICAST_TO_UNICAST_ENABLED: Indicates whether or not multicast + * packets should be send out as unicast to all stations (flag attribute). + * + * @NL80211_ATTR_BSSID: The BSSID of the AP. Note that %NL80211_ATTR_MAC is also + * used in various commands/events for specifying the BSSID. * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined @@ -2195,7 +2238,7 @@ enum nl80211_attrs { NL80211_ATTR_CONN_FAILED_REASON, - NL80211_ATTR_SAE_DATA, + NL80211_ATTR_AUTH_DATA, NL80211_ATTR_VHT_CAPABILITY, @@ -2336,6 +2379,13 @@ enum nl80211_attrs { NL80211_ATTR_NAN_FUNC, NL80211_ATTR_NAN_MATCH, + NL80211_ATTR_FILS_KEK, + NL80211_ATTR_FILS_NONCES, + + NL80211_ATTR_MULTICAST_TO_UNICAST_ENABLED, + + NL80211_ATTR_BSSID, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -2347,6 +2397,7 @@ enum nl80211_attrs { #define NL80211_ATTR_SCAN_GENERATION NL80211_ATTR_GENERATION #define NL80211_ATTR_MESH_PARAMS NL80211_ATTR_MESH_CONFIG #define NL80211_ATTR_IFACE_SOCKET_OWNER NL80211_ATTR_SOCKET_OWNER +#define NL80211_ATTR_SAE_DATA NL80211_ATTR_AUTH_DATA /* * Allow user space programs to use #ifdef on new attributes by defining them @@ -3660,6 +3711,9 @@ enum nl80211_bss_status { * @NL80211_AUTHTYPE_FT: Fast BSS Transition (IEEE 802.11r) * @NL80211_AUTHTYPE_NETWORK_EAP: Network EAP (some Cisco APs and mainly LEAP) * @NL80211_AUTHTYPE_SAE: Simultaneous authentication of equals + * @NL80211_AUTHTYPE_FILS_SK: Fast Initial Link Setup shared key + * @NL80211_AUTHTYPE_FILS_SK_PFS: Fast Initial Link Setup shared key with PFS + * @NL80211_AUTHTYPE_FILS_PK: Fast Initial Link Setup public key * @__NL80211_AUTHTYPE_NUM: internal * @NL80211_AUTHTYPE_MAX: maximum valid auth algorithm * @NL80211_AUTHTYPE_AUTOMATIC: determine automatically (if necessary by @@ -3672,6 +3726,9 @@ enum nl80211_auth_type { NL80211_AUTHTYPE_FT, NL80211_AUTHTYPE_NETWORK_EAP, NL80211_AUTHTYPE_SAE, + NL80211_AUTHTYPE_FILS_SK, + NL80211_AUTHTYPE_FILS_SK_PFS, + NL80211_AUTHTYPE_FILS_PK, /* keep last */ __NL80211_AUTHTYPE_NUM, @@ -4280,6 +4337,9 @@ enum nl80211_iface_limit_attrs { * of supported channel widths for radar detection. * @NL80211_IFACE_COMB_RADAR_DETECT_REGIONS: u32 attribute containing the bitmap * of supported regulatory regions for radar detection. + * @NL80211_IFACE_COMB_BI_MIN_GCD: u32 attribute specifying the minimum GCD of + * different beacon intervals supported by all the interface combinations + * in this group (if not present, all beacon intervals be identical). * @NUM_NL80211_IFACE_COMB: number of attributes * @MAX_NL80211_IFACE_COMB: highest attribute number * @@ -4287,8 +4347,8 @@ enum nl80211_iface_limit_attrs { * limits = [ #{STA} <= 1, #{AP} <= 1 ], matching BI, channels = 1, max = 2 * => allows an AP and a STA that must match BIs * - * numbers = [ #{AP, P2P-GO} <= 8 ], channels = 1, max = 8 - * => allows 8 of AP/GO + * numbers = [ #{AP, P2P-GO} <= 8 ], BI min gcd, channels = 1, max = 8, + * => allows 8 of AP/GO that can have BI gcd >= min gcd * * numbers = [ #{STA} <= 2 ], channels = 2, max = 2 * => allows two STAs on different channels @@ -4314,6 +4374,7 @@ enum nl80211_if_combination_attrs { NL80211_IFACE_COMB_NUM_CHANNELS, NL80211_IFACE_COMB_RADAR_DETECT_WIDTHS, NL80211_IFACE_COMB_RADAR_DETECT_REGIONS, + NL80211_IFACE_COMB_BI_MIN_GCD, /* keep last */ NUM_NL80211_IFACE_COMB, @@ -4634,6 +4695,8 @@ enum nl80211_feature_flags { * configuration (AP/mesh) with HT rates. * @NL80211_EXT_FEATURE_BEACON_RATE_VHT: Driver supports beacon rate * configuration (AP/mesh) with VHT rates. + * @NL80211_EXT_FEATURE_FILS_STA: This driver supports Fast Initial Link Setup + * with user space SME (NL80211_CMD_AUTHENTICATE) in station mode. * * @NUM_NL80211_EXT_FEATURES: number of extended features. * @MAX_NL80211_EXT_FEATURES: highest extended feature index. @@ -4648,6 +4711,7 @@ enum nl80211_ext_feature_index { NL80211_EXT_FEATURE_BEACON_RATE_LEGACY, NL80211_EXT_FEATURE_BEACON_RATE_HT, NL80211_EXT_FEATURE_BEACON_RATE_VHT, + NL80211_EXT_FEATURE_FILS_STA, /* add new features before the definition below */ NUM_NL80211_EXT_FEATURES, diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index 59ed3992c760..375d812fea36 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -705,6 +705,15 @@ enum ovs_nat_attr { #define OVS_NAT_ATTR_MAX (__OVS_NAT_ATTR_MAX - 1) +/* + * struct ovs_action_push_eth - %OVS_ACTION_ATTR_PUSH_ETH action argument. + * @addresses: Source and destination MAC addresses. + * @eth_type: Ethernet type + */ +struct ovs_action_push_eth { + struct ovs_key_ethernet addresses; +}; + /** * enum ovs_action_attr - Action types. * @@ -738,6 +747,10 @@ enum ovs_nat_attr { * is no MPLS label stack, as determined by ethertype, no action is taken. * @OVS_ACTION_ATTR_CT: Track the connection. Populate the conntrack-related * entries in the flow key. + * @OVS_ACTION_ATTR_PUSH_ETH: Push a new outermost Ethernet header onto the + * packet. + * @OVS_ACTION_ATTR_POP_ETH: Pop the outermost Ethernet header off the + * packet. * * Only a single header can be set with a single %OVS_ACTION_ATTR_SET. Not all * fields within a header are modifiable, e.g. the IPv4 protocol and fragment @@ -765,6 +778,8 @@ enum ovs_action_attr { * bits. */ OVS_ACTION_ATTR_CT, /* Nested OVS_CT_ATTR_* . */ OVS_ACTION_ATTR_TRUNC, /* u32 struct ovs_action_trunc. */ + OVS_ACTION_ATTR_PUSH_ETH, /* struct ovs_action_push_eth. */ + OVS_ACTION_ATTR_POP_ETH, /* No argument. */ __OVS_ACTION_ATTR_MAX, /* Nothing past this will be accepted * from userspace. */ diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 8fd715f806a2..cb4bcdc58543 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -397,6 +397,7 @@ enum { TCA_BPF_NAME, TCA_BPF_FLAGS, TCA_BPF_FLAGS_GEN, + TCA_BPF_DIGEST, __TCA_BPF_MAX, }; @@ -447,11 +448,38 @@ enum { TCA_FLOWER_KEY_TCP_DST_MASK, /* be16 */ TCA_FLOWER_KEY_UDP_SRC_MASK, /* be16 */ TCA_FLOWER_KEY_UDP_DST_MASK, /* be16 */ + TCA_FLOWER_KEY_SCTP_SRC_MASK, /* be16 */ + TCA_FLOWER_KEY_SCTP_DST_MASK, /* be16 */ + + TCA_FLOWER_KEY_SCTP_SRC, /* be16 */ + TCA_FLOWER_KEY_SCTP_DST, /* be16 */ + + TCA_FLOWER_KEY_ENC_UDP_SRC_PORT, /* be16 */ + TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK, /* be16 */ + TCA_FLOWER_KEY_ENC_UDP_DST_PORT, /* be16 */ + TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK, /* be16 */ + + TCA_FLOWER_KEY_FLAGS, /* be32 */ + TCA_FLOWER_KEY_FLAGS_MASK, /* be32 */ + + TCA_FLOWER_KEY_ICMPV4_CODE, /* u8 */ + TCA_FLOWER_KEY_ICMPV4_CODE_MASK,/* u8 */ + TCA_FLOWER_KEY_ICMPV4_TYPE, /* u8 */ + TCA_FLOWER_KEY_ICMPV4_TYPE_MASK,/* u8 */ + TCA_FLOWER_KEY_ICMPV6_CODE, /* u8 */ + TCA_FLOWER_KEY_ICMPV6_CODE_MASK,/* u8 */ + TCA_FLOWER_KEY_ICMPV6_TYPE, /* u8 */ + TCA_FLOWER_KEY_ICMPV6_TYPE_MASK,/* u8 */ + __TCA_FLOWER_MAX, }; #define TCA_FLOWER_MAX (__TCA_FLOWER_MAX - 1) +enum { + TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT = (1 << 0), +}; + /* Match-all classifier */ enum { diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index 5a78be518101..e14377f2ec27 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -318,6 +318,7 @@ enum rtattr_type_t { RTA_ENCAP, RTA_EXPIRES, RTA_PAD, + RTA_UID, __RTA_MAX }; diff --git a/include/uapi/linux/seg6.h b/include/uapi/linux/seg6.h new file mode 100644 index 000000000000..c396a8052f73 --- /dev/null +++ b/include/uapi/linux/seg6.h @@ -0,0 +1,54 @@ +/* + * SR-IPv6 implementation + * + * Author: + * David Lebrun <david.lebrun@uclouvain.be> + * + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _UAPI_LINUX_SEG6_H +#define _UAPI_LINUX_SEG6_H + +/* + * SRH + */ +struct ipv6_sr_hdr { + __u8 nexthdr; + __u8 hdrlen; + __u8 type; + __u8 segments_left; + __u8 first_segment; + __u8 flag_1; + __u8 flag_2; + __u8 reserved; + + struct in6_addr segments[0]; +}; + +#define SR6_FLAG1_CLEANUP (1 << 7) +#define SR6_FLAG1_PROTECTED (1 << 6) +#define SR6_FLAG1_OAM (1 << 5) +#define SR6_FLAG1_ALERT (1 << 4) +#define SR6_FLAG1_HMAC (1 << 3) + +#define SR6_TLV_INGRESS 1 +#define SR6_TLV_EGRESS 2 +#define SR6_TLV_OPAQUE 3 +#define SR6_TLV_PADDING 4 +#define SR6_TLV_HMAC 5 + +#define sr_has_cleanup(srh) ((srh)->flag_1 & SR6_FLAG1_CLEANUP) +#define sr_has_hmac(srh) ((srh)->flag_1 & SR6_FLAG1_HMAC) + +struct sr6_tlv { + __u8 type; + __u8 len; + __u8 data[0]; +}; + +#endif diff --git a/include/uapi/linux/seg6_genl.h b/include/uapi/linux/seg6_genl.h new file mode 100644 index 000000000000..fcf1c60d7df3 --- /dev/null +++ b/include/uapi/linux/seg6_genl.h @@ -0,0 +1,32 @@ +#ifndef _UAPI_LINUX_SEG6_GENL_H +#define _UAPI_LINUX_SEG6_GENL_H + +#define SEG6_GENL_NAME "SEG6" +#define SEG6_GENL_VERSION 0x1 + +enum { + SEG6_ATTR_UNSPEC, + SEG6_ATTR_DST, + SEG6_ATTR_DSTLEN, + SEG6_ATTR_HMACKEYID, + SEG6_ATTR_SECRET, + SEG6_ATTR_SECRETLEN, + SEG6_ATTR_ALGID, + SEG6_ATTR_HMACINFO, + __SEG6_ATTR_MAX, +}; + +#define SEG6_ATTR_MAX (__SEG6_ATTR_MAX - 1) + +enum { + SEG6_CMD_UNSPEC, + SEG6_CMD_SETHMAC, + SEG6_CMD_DUMPHMAC, + SEG6_CMD_SET_TUNSRC, + SEG6_CMD_GET_TUNSRC, + __SEG6_CMD_MAX, +}; + +#define SEG6_CMD_MAX (__SEG6_CMD_MAX - 1) + +#endif diff --git a/include/uapi/linux/seg6_hmac.h b/include/uapi/linux/seg6_hmac.h new file mode 100644 index 000000000000..b652dfd51bc5 --- /dev/null +++ b/include/uapi/linux/seg6_hmac.h @@ -0,0 +1,21 @@ +#ifndef _UAPI_LINUX_SEG6_HMAC_H +#define _UAPI_LINUX_SEG6_HMAC_H + +#include <linux/seg6.h> + +#define SEG6_HMAC_SECRET_LEN 64 +#define SEG6_HMAC_FIELD_LEN 32 + +struct sr6_tlv_hmac { + struct sr6_tlv tlvhdr; + __u16 reserved; + __be32 hmackeyid; + __u8 hmac[SEG6_HMAC_FIELD_LEN]; +}; + +enum { + SEG6_HMAC_ALGO_SHA1 = 1, + SEG6_HMAC_ALGO_SHA256 = 2, +}; + +#endif diff --git a/include/uapi/linux/seg6_iptunnel.h b/include/uapi/linux/seg6_iptunnel.h new file mode 100644 index 000000000000..0f7dbd280a9c --- /dev/null +++ b/include/uapi/linux/seg6_iptunnel.h @@ -0,0 +1,44 @@ +/* + * SR-IPv6 implementation + * + * Author: + * David Lebrun <david.lebrun@uclouvain.be> + * + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _UAPI_LINUX_SEG6_IPTUNNEL_H +#define _UAPI_LINUX_SEG6_IPTUNNEL_H + +enum { + SEG6_IPTUNNEL_UNSPEC, + SEG6_IPTUNNEL_SRH, + __SEG6_IPTUNNEL_MAX, +}; +#define SEG6_IPTUNNEL_MAX (__SEG6_IPTUNNEL_MAX - 1) + +struct seg6_iptunnel_encap { + int mode; + struct ipv6_sr_hdr srh[0]; +}; + +#define SEG6_IPTUN_ENCAP_SIZE(x) ((sizeof(*x)) + (((x)->srh->hdrlen + 1) << 3)) + +enum { + SEG6_IPTUN_MODE_INLINE, + SEG6_IPTUN_MODE_ENCAP, +}; + +static inline size_t seg6_lwt_headroom(struct seg6_iptunnel_encap *tuninfo) +{ + int encap = (tuninfo->mode == SEG6_IPTUN_MODE_ENCAP); + + return ((tuninfo->srh->hdrlen + 1) << 3) + + (encap * sizeof(struct ipv6hdr)); +} + +#endif diff --git a/include/uapi/linux/sockios.h b/include/uapi/linux/sockios.h index 8e7890b26d9a..79d029d25310 100644 --- a/include/uapi/linux/sockios.h +++ b/include/uapi/linux/sockios.h @@ -24,6 +24,8 @@ #define SIOCINQ FIONREAD #define SIOCOUTQ TIOCOUTQ /* output queue size (not sent + not acked) */ +#define SOCK_IOC_TYPE 0x89 + /* Routing table calls. */ #define SIOCADDRT 0x890B /* add routing table entry */ #define SIOCDELRT 0x890C /* delete routing table entry */ @@ -84,6 +86,7 @@ #define SIOCWANDEV 0x894A /* get/set netdev parameters */ #define SIOCOUTQNSD 0x894B /* output queue size (not sent only) */ +#define SIOCGSKNS 0x894C /* get socket network namespace */ /* ARP cache control calls. */ /* 0x8950 - 0x8952 * obsolete calls, don't re-use */ diff --git a/include/uapi/linux/tc_act/Kbuild b/include/uapi/linux/tc_act/Kbuild index e3969bd939e4..e3db7403296f 100644 --- a/include/uapi/linux/tc_act/Kbuild +++ b/include/uapi/linux/tc_act/Kbuild @@ -11,3 +11,5 @@ header-y += tc_vlan.h header-y += tc_bpf.h header-y += tc_connmark.h header-y += tc_ife.h +header-y += tc_tunnel_key.h +header-y += tc_skbmod.h diff --git a/include/uapi/linux/tc_act/tc_bpf.h b/include/uapi/linux/tc_act/tc_bpf.h index 063d9d465119..a6b88a6f7f71 100644 --- a/include/uapi/linux/tc_act/tc_bpf.h +++ b/include/uapi/linux/tc_act/tc_bpf.h @@ -27,6 +27,7 @@ enum { TCA_ACT_BPF_FD, TCA_ACT_BPF_NAME, TCA_ACT_BPF_PAD, + TCA_ACT_BPF_DIGEST, __TCA_ACT_BPF_MAX, }; #define TCA_ACT_BPF_MAX (__TCA_ACT_BPF_MAX - 1) diff --git a/include/uapi/linux/tc_act/tc_skbedit.h b/include/uapi/linux/tc_act/tc_skbedit.h index a4d00c608d8f..2884425738ce 100644 --- a/include/uapi/linux/tc_act/tc_skbedit.h +++ b/include/uapi/linux/tc_act/tc_skbedit.h @@ -28,6 +28,7 @@ #define SKBEDIT_F_QUEUE_MAPPING 0x2 #define SKBEDIT_F_MARK 0x4 #define SKBEDIT_F_PTYPE 0x8 +#define SKBEDIT_F_MASK 0x10 struct tc_skbedit { tc_gen; @@ -42,6 +43,7 @@ enum { TCA_SKBEDIT_MARK, TCA_SKBEDIT_PAD, TCA_SKBEDIT_PTYPE, + TCA_SKBEDIT_MASK, __TCA_SKBEDIT_MAX }; #define TCA_SKBEDIT_MAX (__TCA_SKBEDIT_MAX - 1) diff --git a/include/uapi/linux/tc_act/tc_tunnel_key.h b/include/uapi/linux/tc_act/tc_tunnel_key.h index 890106ff16e6..84ea55e1076b 100644 --- a/include/uapi/linux/tc_act/tc_tunnel_key.h +++ b/include/uapi/linux/tc_act/tc_tunnel_key.h @@ -33,6 +33,7 @@ enum { TCA_TUNNEL_KEY_ENC_IPV6_DST, /* struct in6_addr */ TCA_TUNNEL_KEY_ENC_KEY_ID, /* be64 */ TCA_TUNNEL_KEY_PAD, + TCA_TUNNEL_KEY_ENC_DST_PORT, /* be16 */ __TCA_TUNNEL_KEY_MAX, }; diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index 73ac0db487f8..c53de2691cec 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -214,6 +214,18 @@ struct tcp_info { __u32 tcpi_data_segs_out; /* RFC4898 tcpEStatsDataSegsOut */ __u64 tcpi_delivery_rate; + + __u64 tcpi_busy_time; /* Time (usec) busy sending data */ + __u64 tcpi_rwnd_limited; /* Time (usec) limited by receive window */ + __u64 tcpi_sndbuf_limited; /* Time (usec) limited by send buffer */ +}; + +/* netlink attributes types for SCM_TIMESTAMPING_OPT_STATS */ +enum { + TCP_NLA_PAD, + TCP_NLA_BUSY, /* Time (usec) busy sending data */ + TCP_NLA_RWND_LIMITED, /* Time (usec) limited by receive window */ + TCP_NLA_SNDBUF_LIMITED, /* Time (usec) limited by send buffer */ }; /* for TCP_MD5SIG socket option */ |