From 306f7aa1807be7588f115d7cafa475f65e72e3d1 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Wed, 11 Feb 2015 14:39:15 +0100 Subject: ieee802154: correct ieee802154_is_valid_psdu_len This patch corrects the ieee802154_is_valid_psdu_len function that this function also checks on reserved values 6-8 for validation the psdu length. Signed-off-by: Alexander Aring Signed-off-by: Marcel Holtmann --- include/linux/ieee802154.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ieee802154.h b/include/linux/ieee802154.h index 6e82d888287c..40b0ab953937 100644 --- a/include/linux/ieee802154.h +++ b/include/linux/ieee802154.h @@ -28,7 +28,8 @@ #include #define IEEE802154_MTU 127 -#define IEEE802154_MIN_PSDU_LEN 5 +#define IEEE802154_ACK_PSDU_LEN 5 +#define IEEE802154_MIN_PSDU_LEN 9 #define IEEE802154_PAN_ID_BROADCAST 0xffff #define IEEE802154_ADDR_SHORT_BROADCAST 0xffff @@ -204,11 +205,18 @@ enum { /** * ieee802154_is_valid_psdu_len - check if psdu len is valid + * available lengths: + * 0-4 Reserved + * 5 MPDU (Acknowledgment) + * 6-8 Reserved + * 9-127 MPDU + * * @len: psdu len with (MHR + payload + MFR) */ static inline bool ieee802154_is_valid_psdu_len(const u8 len) { - return (len >= IEEE802154_MIN_PSDU_LEN && len <= IEEE802154_MTU); + return (len == IEEE802154_ACK_PSDU_LEN || + (len >= IEEE802154_MIN_PSDU_LEN && len <= IEEE802154_MTU)); } /** -- cgit v1.2.3 From 959d10f6bbf6ab5b8813c4e37540a2e43ca2ae96 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 17 Feb 2015 03:19:24 -0800 Subject: igmp: add __ip_mc_{join|leave}_group() There is a need to perform igmp join/leave operations while RTNL is held. Make ip_mc_{join|leave}_group() wrappers around __ip_mc_{join|leave}_group() to avoid the proliferation of work queues. For example, vxlan_igmp_join() could possibly be removed. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/igmp.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/igmp.h b/include/linux/igmp.h index 2c677afeea47..b5a6470e686c 100644 --- a/include/linux/igmp.h +++ b/include/linux/igmp.h @@ -111,7 +111,9 @@ struct ip_mc_list { extern int ip_check_mc_rcu(struct in_device *dev, __be32 mc_addr, __be32 src_addr, u16 proto); extern int igmp_rcv(struct sk_buff *); +extern int __ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr); extern int ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr); +extern int __ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr); extern int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr); extern void ip_mc_drop_socket(struct sock *sk); extern int ip_mc_source(int add, int omode, struct sock *sk, -- cgit v1.2.3 From 059a2440fd3cf4ec57735db2c0a90401cde84fca Mon Sep 17 00:00:00 2001 From: Bojan Prtvar Date: Sun, 22 Feb 2015 11:46:35 +0100 Subject: net: Remove state argument from skb_find_text() Although it is clear that textsearch state is intentionally passed to skb_find_text() as uninitialized argument, it was never used by the callers. Therefore, we can simplify skb_find_text() by making it local variable. Signed-off-by: Bojan Prtvar Signed-off-by: David S. Miller --- include/linux/skbuff.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 30007afe70b3..d898b32dedcc 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -870,8 +870,7 @@ unsigned int skb_seq_read(unsigned int consumed, const u8 **data, void skb_abort_seq_read(struct skb_seq_state *st); unsigned int skb_find_text(struct sk_buff *skb, unsigned int from, - unsigned int to, struct ts_config *config, - struct ts_state *state); + unsigned int to, struct ts_config *config); /* * Packet hash types specify the type of hash in skb_set_hash. -- cgit v1.2.3 From ccdaeb2b176f7db491a6f8e8b1c51f9393525f7d Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Fri, 27 Feb 2015 09:58:26 +0100 Subject: at86rf230: add support for external xtal trim This patch adds support for setting the xtal trim register. Some at86rf2xx transceiver boards needs fine tuning the xtal capacitor. Signed-off-by: Alexander Aring Signed-off-by: Marcel Holtmann --- include/linux/spi/at86rf230.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/spi/at86rf230.h b/include/linux/spi/at86rf230.h index cd519a11c2c6..b63fe6f5fdc8 100644 --- a/include/linux/spi/at86rf230.h +++ b/include/linux/spi/at86rf230.h @@ -22,6 +22,7 @@ struct at86rf230_platform_data { int rstn; int slp_tr; int dig2; + u8 xtal_trim; }; #endif -- cgit v1.2.3 From 6588af614e7b79294fbcd4a666a7422c0c854e80 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Thu, 26 Feb 2015 19:34:37 +0000 Subject: usbnet: Fix tx_packets stat for FLAG_MULTI_FRAME drivers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently the usbnet core does not update the tx_packets statistic for drivers with FLAG_MULTI_PACKET and there is no hook in the TX completion path where they could do this. cdc_ncm and dependent drivers are bumping tx_packets stat on the transmit path while asix and sr9800 aren't updating it at all. Add a packet count in struct skb_data so these drivers can fill it in, initialise it to 1 for other drivers, and add the packet count to the tx_packets statistic on completion. Signed-off-by: Ben Hutchings Tested-by: Bjørn Mork Signed-off-by: David S. Miller --- include/linux/usb/usbnet.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h index d9a4905e01d0..ff3fb2bd0e90 100644 --- a/include/linux/usb/usbnet.h +++ b/include/linux/usb/usbnet.h @@ -228,8 +228,20 @@ struct skb_data { /* skb->cb is one of these */ struct usbnet *dev; enum skb_state state; size_t length; + unsigned long packets; }; +/* Drivers that set FLAG_MULTI_PACKET must call this in their + * tx_fixup method before returning an skb. + */ +static inline void +usbnet_set_skb_tx_stats(struct sk_buff *skb, unsigned long packets) +{ + struct skb_data *entry = (struct skb_data *) skb->cb; + + entry->packets = packets; +} + extern int usbnet_open(struct net_device *net); extern int usbnet_stop(struct net_device *net); extern netdev_tx_t usbnet_start_xmit(struct sk_buff *skb, -- cgit v1.2.3 From 5f852eb536ad651b8734559dcf4353514cb0bea3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 26 Feb 2015 14:10:18 -0800 Subject: tcp: tso: remove tp->tso_deferred TSO relies on ability to defer sending a small amount of packets. Heuristic is to wait for future ACKS in hope to send more packets at once. Current algorithm uses a per socket tso_deferred field as a pseudo timer. This pseudo timer relies on future ACK, but there is no guarantee we receive them in time. Fix would be to use a real timer, but cost of such timer is probably too expensive for typical cases. This patch changes the logic to test the time of last transmit, because we should not add bursts of more than 1ms for any given flow. We've used this patch for about two years at Google, before FQ/pacing as it would reduce a fair amount of bursts. Signed-off-by: Eric Dumazet Signed-off-by: Yuchung Cheng Signed-off-by: Neal Cardwell Signed-off-by: David S. Miller --- include/linux/tcp.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 1a7adb411647..97dbf16f7d9d 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -236,7 +236,6 @@ struct tcp_sock { u32 lost_out; /* Lost packets */ u32 sacked_out; /* SACK'd packets */ u32 fackets_out; /* FACK'd packets */ - u32 tso_deferred; /* from STCP, retrans queue hinting */ struct sk_buff* lost_skb_hint; -- cgit v1.2.3 From a2c83fff582ae133d9f5bb187404ea9ce4da1f96 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sun, 1 Mar 2015 12:31:42 +0100 Subject: ebpf: constify various function pointer structs We can move bpf_map_ops and bpf_verifier_ops and other structs into ro section, bpf_map_type_list and bpf_prog_type_list into read mostly. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index bbfceb756452..78446860f796 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -32,13 +32,13 @@ struct bpf_map { u32 key_size; u32 value_size; u32 max_entries; - struct bpf_map_ops *ops; + const struct bpf_map_ops *ops; struct work_struct work; }; struct bpf_map_type_list { struct list_head list_node; - struct bpf_map_ops *ops; + const struct bpf_map_ops *ops; enum bpf_map_type type; }; @@ -109,7 +109,7 @@ struct bpf_verifier_ops { struct bpf_prog_type_list { struct list_head list_node; - struct bpf_verifier_ops *ops; + const struct bpf_verifier_ops *ops; enum bpf_prog_type type; }; @@ -121,7 +121,7 @@ struct bpf_prog_aux { atomic_t refcnt; bool is_gpl_compatible; enum bpf_prog_type prog_type; - struct bpf_verifier_ops *ops; + const struct bpf_verifier_ops *ops; struct bpf_map **used_maps; u32 used_map_cnt; struct bpf_prog *prog; @@ -138,8 +138,8 @@ struct bpf_prog *bpf_prog_get(u32 ufd); int bpf_check(struct bpf_prog *fp, union bpf_attr *attr); /* verifier prototypes for helper functions called from eBPF programs */ -extern struct bpf_func_proto bpf_map_lookup_elem_proto; -extern struct bpf_func_proto bpf_map_update_elem_proto; -extern struct bpf_func_proto bpf_map_delete_elem_proto; +extern const struct bpf_func_proto bpf_map_lookup_elem_proto; +extern const struct bpf_func_proto bpf_map_update_elem_proto; +extern const struct bpf_func_proto bpf_map_delete_elem_proto; #endif /* _LINUX_BPF_H */ -- cgit v1.2.3 From f1a66f85b74c5ef7b503f746ea97742dacd56419 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sun, 1 Mar 2015 12:31:43 +0100 Subject: ebpf: export BPF_PSEUDO_MAP_FD to uapi We need to export BPF_PSEUDO_MAP_FD to user space, as it's used in the ELF BPF loader where instructions are being loaded that need map fixups. An initial stage loads all maps into the kernel, and later on replaces related instructions in the eBPF blob with BPF_PSEUDO_MAP_FD as source register and the actual fd as immediate value. The kernel verifier recognizes this keyword and replaces the map fd with a real pointer internally. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/filter.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index caac2087a4d5..5e3863d5f666 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -145,8 +145,6 @@ struct bpf_prog_aux; .off = 0, \ .imm = ((__u64) (IMM)) >> 32 }) -#define BPF_PSEUDO_MAP_FD 1 - /* pseudo BPF_LD_IMM64 insn used to refer to process-local map_fd */ #define BPF_LD_MAP_FD(DST, MAP_FD) \ BPF_LD_IMM64_RAW(DST, BPF_PSEUDO_MAP_FD, MAP_FD) -- cgit v1.2.3 From 0fc174dea54546e2b1146e1197da1b6d4bc48107 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sun, 1 Mar 2015 12:31:44 +0100 Subject: ebpf: make internal bpf API independent of CONFIG_BPF_SYSCALL ifdefs Socket filter code and other subsystems with upcoming eBPF support should not need to deal with the fact that we have CONFIG_BPF_SYSCALL defined or not. Having the bpf syscall as a config option is a nice thing and I'd expect it to stay that way for expert users (I presume one day the default setting of it might change, though), but code making use of it should not care if it's actually enabled or not. Instead, hide this via header files and let the rest deal with it. Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 78446860f796..9c458144cdb4 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -113,8 +113,6 @@ struct bpf_prog_type_list { enum bpf_prog_type type; }; -void bpf_register_prog_type(struct bpf_prog_type_list *tl); - struct bpf_prog; struct bpf_prog_aux { @@ -129,11 +127,25 @@ struct bpf_prog_aux { }; #ifdef CONFIG_BPF_SYSCALL +void bpf_register_prog_type(struct bpf_prog_type_list *tl); + void bpf_prog_put(struct bpf_prog *prog); +struct bpf_prog *bpf_prog_get(u32 ufd); #else -static inline void bpf_prog_put(struct bpf_prog *prog) {} +static inline void bpf_register_prog_type(struct bpf_prog_type_list *tl) +{ +} + +static inline struct bpf_prog *bpf_prog_get(u32 ufd) +{ + return ERR_PTR(-EOPNOTSUPP); +} + +static inline void bpf_prog_put(struct bpf_prog *prog) +{ +} #endif -struct bpf_prog *bpf_prog_get(u32 ufd); + /* verify correctness of eBPF program */ int bpf_check(struct bpf_prog *fp, union bpf_attr *attr); -- cgit v1.2.3 From 24701ecea76b0b93bd9667486934ec310825f558 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sun, 1 Mar 2015 12:31:47 +0100 Subject: ebpf: move read-only fields to bpf_prog and shrink bpf_prog_aux is_gpl_compatible and prog_type should be moved directly into bpf_prog as they stay immutable during bpf_prog's lifetime, are core attributes and they can be locked as read-only later on via bpf_prog_select_runtime(). With a bit of rearranging, this also allows us to shrink bpf_prog_aux to exactly 1 cacheline. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 4 +--- include/linux/filter.h | 4 +++- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 9c458144cdb4..a1a7ff2df328 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -117,11 +117,9 @@ struct bpf_prog; struct bpf_prog_aux { atomic_t refcnt; - bool is_gpl_compatible; - enum bpf_prog_type prog_type; + u32 used_map_cnt; const struct bpf_verifier_ops *ops; struct bpf_map **used_maps; - u32 used_map_cnt; struct bpf_prog *prog; struct work_struct work; }; diff --git a/include/linux/filter.h b/include/linux/filter.h index 5e3863d5f666..9ee8c67ea249 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -308,9 +308,11 @@ struct bpf_binary_header { struct bpf_prog { u16 pages; /* Number of allocated pages */ bool jited; /* Is our filter JIT'ed? */ + bool gpl_compatible; /* Is our filter GPL compatible? */ u32 len; /* Number of filter blocks */ - struct sock_fprog_kern *orig_prog; /* Original BPF program */ + enum bpf_prog_type type; /* Type of BPF program */ struct bpf_prog_aux *aux; /* Auxiliary fields */ + struct sock_fprog_kern *orig_prog; /* Original BPF program */ unsigned int (*bpf_func)(const struct sk_buff *skb, const struct bpf_insn *filter); /* Instructions for interpreter */ -- cgit v1.2.3 From 287f3a943fef58c5c73e42545169443be379222f Mon Sep 17 00:00:00 2001 From: Simon Farnsworth Date: Sun, 1 Mar 2015 10:54:39 +0000 Subject: pppoe: Use workqueue to die properly when a PADT is received When a PADT frame is received, the socket may not be in a good state to close down the PPP interface. The current implementation handles this by simply blocking all further PPP traffic, and hoping that the lack of traffic will trigger the user to investigate. Use schedule_work to get to a process context from which we clear down the PPP interface, in a fashion analogous to hangup on a TTY-based PPP interface. This causes pppd to disconnect immediately, and allows tools to take immediate corrective action. Note that pppd's rp_pppoe.so plugin has code in it to disable the session when it disconnects; however, as a consequence of this patch, the session is already disabled before rp_pppoe.so is asked to disable the session. The result is a harmless error message: Failed to disconnect PPPoE socket: 114 Operation already in progress This message is safe to ignore, as long as the error is 114 Operation already in progress; in that specific case, it means that the PPPoE session has already been disabled before pppd tried to disable it. Signed-off-by: Simon Farnsworth Tested-by: Dan Williams Tested-by: Christoph Schulz Signed-off-by: David S. Miller --- include/linux/if_pppox.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/if_pppox.h b/include/linux/if_pppox.h index aff7ad8a4ea3..66a7d7600f43 100644 --- a/include/linux/if_pppox.h +++ b/include/linux/if_pppox.h @@ -19,6 +19,7 @@ #include #include #include +#include #include static inline struct pppoe_hdr *pppoe_hdr(const struct sk_buff *skb) @@ -32,6 +33,7 @@ struct pppoe_opt { struct pppoe_addr pa; /* what this socket is bound to*/ struct sockaddr_pppox relay; /* what socket data will be relayed to (PPPoE relaying) */ + struct work_struct padt_work;/* Work item for handling PADT */ }; struct pptp_opt { -- cgit v1.2.3 From 744d5a3e9fe2690dd85d9991dbb078301694658b Mon Sep 17 00:00:00 2001 From: Eyal Birger Date: Sun, 1 Mar 2015 14:58:31 +0200 Subject: net: move skb->dropcount to skb->cb[] Commit 977750076d98 ("af_packet: add interframe drop cmsg (v6)") unionized skb->mark and skb->dropcount in order to allow recording of the socket drop count while maintaining struct sk_buff size. skb->dropcount was introduced since there was no available room in skb->cb[] in packet sockets. However, its introduction led to the inability to export skb->mark, or any other aliased field to userspace if so desired. Moving the dropcount metric to skb->cb[] eliminates this problem at the expense of 4 bytes less in skb->cb[] for protocol families using it. Signed-off-by: Eyal Birger Signed-off-by: David S. Miller --- include/linux/skbuff.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index d898b32dedcc..bba1330757c0 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -492,7 +492,6 @@ static inline u32 skb_mstamp_us_delta(const struct skb_mstamp *t1, * @napi_id: id of the NAPI struct this skb came from * @secmark: security marking * @mark: Generic packet mark - * @dropcount: total number of sk_receive_queue overflows * @vlan_proto: vlan encapsulation protocol * @vlan_tci: vlan tag control information * @inner_protocol: Protocol (encapsulation) @@ -641,7 +640,6 @@ struct sk_buff { #endif union { __u32 mark; - __u32 dropcount; __u32 reserved_tailroom; }; -- cgit v1.2.3 From 4186721d02b71ae943e60bbf50d3488fd5fd6adb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Sun, 8 Feb 2015 17:11:47 +0100 Subject: bcma: add helpers bringing PCIe hosted bus up / down MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bringing PCIe hosted bus up requires operating on host-related core. Since we plan to support PCIe Gen 2 devices we should provide a helper picking the correct one (PCIE or PCIE2). Signed-off-by: Rafał Miłecki Signed-off-by: Kalle Valo --- include/linux/bcma/bcma.h | 3 +++ include/linux/bcma/bcma_driver_pci.h | 2 -- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bcma/bcma.h b/include/linux/bcma/bcma.h index 994739da827f..037620b3f113 100644 --- a/include/linux/bcma/bcma.h +++ b/include/linux/bcma/bcma.h @@ -434,6 +434,9 @@ static inline struct bcma_device *bcma_find_core(struct bcma_bus *bus, return bcma_find_core_unit(bus, coreid, 0); } +extern void bcma_host_pci_up(struct bcma_bus *bus); +extern void bcma_host_pci_down(struct bcma_bus *bus); + extern bool bcma_core_is_enabled(struct bcma_device *core); extern void bcma_core_disable(struct bcma_device *core, u32 flags); extern int bcma_core_enable(struct bcma_device *core, u32 flags); diff --git a/include/linux/bcma/bcma_driver_pci.h b/include/linux/bcma/bcma_driver_pci.h index 3f809ae372c4..23af893e9b86 100644 --- a/include/linux/bcma/bcma_driver_pci.h +++ b/include/linux/bcma/bcma_driver_pci.h @@ -242,8 +242,6 @@ extern void bcma_core_pci_early_init(struct bcma_drv_pci *pc); extern void bcma_core_pci_init(struct bcma_drv_pci *pc); extern int bcma_core_pci_irq_ctl(struct bcma_drv_pci *pc, struct bcma_device *core, bool enable); -extern void bcma_core_pci_up(struct bcma_bus *bus); -extern void bcma_core_pci_down(struct bcma_bus *bus); extern void bcma_core_pci_power_save(struct bcma_bus *bus, bool up); extern int bcma_core_pci_pcibios_map_irq(const struct pci_dev *dev); -- cgit v1.2.3 From 5b6ff664c8959d715e785b9465b042407a5d87a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Sun, 8 Feb 2015 17:11:48 +0100 Subject: bcma: change IRQ control function to accept bus as an argument MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It doesn't operate on PCI core, but PCI host device, so there is no point of passing core related struct. Signed-off-by: Rafał Miłecki Signed-off-by: Kalle Valo --- include/linux/bcma/bcma_driver_pci.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bcma/bcma_driver_pci.h b/include/linux/bcma/bcma_driver_pci.h index 23af893e9b86..6b8bca67851f 100644 --- a/include/linux/bcma/bcma_driver_pci.h +++ b/include/linux/bcma/bcma_driver_pci.h @@ -240,7 +240,7 @@ struct bcma_drv_pci { extern void bcma_core_pci_early_init(struct bcma_drv_pci *pc); extern void bcma_core_pci_init(struct bcma_drv_pci *pc); -extern int bcma_core_pci_irq_ctl(struct bcma_drv_pci *pc, +extern int bcma_core_pci_irq_ctl(struct bcma_bus *bus, struct bcma_device *core, bool enable); extern void bcma_core_pci_power_save(struct bcma_bus *bus, bool up); -- cgit v1.2.3 From 804e27dee49e20c0addd1b7276654220cc3768ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Sun, 8 Feb 2015 17:11:49 +0100 Subject: bcma: support bringing up bus hosted on PCIe Gen 2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Rafał Miłecki Signed-off-by: Kalle Valo --- include/linux/bcma/bcma_driver_pcie2.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bcma/bcma_driver_pcie2.h b/include/linux/bcma/bcma_driver_pcie2.h index 5988b05781c3..d8c43294c527 100644 --- a/include/linux/bcma/bcma_driver_pcie2.h +++ b/include/linux/bcma/bcma_driver_pcie2.h @@ -143,6 +143,8 @@ struct bcma_drv_pcie2 { struct bcma_device *core; + + u16 reqsize; }; #define pcie2_read16(pcie2, offset) bcma_read16((pcie2)->core, offset) -- cgit v1.2.3 From 1b784140474e4fc94281a49e96c67d29df0efbde Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Mon, 2 Mar 2015 15:37:48 +0800 Subject: net: Remove iocb argument from sendmsg and recvmsg After TIPC doesn't depend on iocb argument in its internal implementations of sendmsg() and recvmsg() hooks defined in proto structure, no any user is using iocb argument in them at all now. Then we can drop the redundant iocb argument completely from kinds of implementations of both sendmsg() and recvmsg() in the entire networking stack. Cc: Christoph Hellwig Suggested-by: Al Viro Signed-off-by: Ying Xue Signed-off-by: David S. Miller --- include/linux/net.h | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/net.h b/include/linux/net.h index 17d83393afcc..e74114bcca68 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -120,7 +120,6 @@ struct socket { struct vm_area_struct; struct page; -struct kiocb; struct sockaddr; struct msghdr; struct module; @@ -162,8 +161,8 @@ struct proto_ops { int (*compat_getsockopt)(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen); #endif - int (*sendmsg) (struct kiocb *iocb, struct socket *sock, - struct msghdr *m, size_t total_len); + int (*sendmsg) (struct socket *sock, struct msghdr *m, + size_t total_len); /* Notes for implementing recvmsg: * =============================== * msg->msg_namelen should get updated by the recvmsg handlers @@ -172,9 +171,8 @@ struct proto_ops { * handlers can assume that msg.msg_name is either NULL or has * a minimum size of sizeof(struct sockaddr_storage). */ - int (*recvmsg) (struct kiocb *iocb, struct socket *sock, - struct msghdr *m, size_t total_len, - int flags); + int (*recvmsg) (struct socket *sock, struct msghdr *m, + size_t total_len, int flags); int (*mmap) (struct file *file, struct socket *sock, struct vm_area_struct * vma); ssize_t (*sendpage) (struct socket *sock, struct page *page, -- cgit v1.2.3 From 61e021f3b86cbbcc04cbe8ac7b7da2b8c94b5e8e Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 2 Mar 2015 15:21:55 +0100 Subject: ebpf: move CONFIG_BPF_SYSCALL-only function declarations Masami noted that it would be better to hide the remaining CONFIG_BPF_SYSCALL-only function declarations within the BPF header ifdef, w/o else path dummy alternatives since these functions are not supposed to have a user outside of CONFIG_BPF_SYSCALL. Suggested-by: Masami Hiramatsu Reference: http://article.gmane.org/gmane.linux.kernel.api/8658 Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/bpf.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index a1a7ff2df328..a884f5a2c503 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -42,10 +42,6 @@ struct bpf_map_type_list { enum bpf_map_type type; }; -void bpf_register_map_type(struct bpf_map_type_list *tl); -void bpf_map_put(struct bpf_map *map); -struct bpf_map *bpf_map_get(struct fd f); - /* function argument constraints */ enum bpf_arg_type { ARG_ANYTHING = 0, /* any argument is ok */ @@ -126,9 +122,16 @@ struct bpf_prog_aux { #ifdef CONFIG_BPF_SYSCALL void bpf_register_prog_type(struct bpf_prog_type_list *tl); +void bpf_register_map_type(struct bpf_map_type_list *tl); -void bpf_prog_put(struct bpf_prog *prog); struct bpf_prog *bpf_prog_get(u32 ufd); +void bpf_prog_put(struct bpf_prog *prog); + +struct bpf_map *bpf_map_get(struct fd f); +void bpf_map_put(struct bpf_map *map); + +/* verify correctness of eBPF program */ +int bpf_check(struct bpf_prog *fp, union bpf_attr *attr); #else static inline void bpf_register_prog_type(struct bpf_prog_type_list *tl) { @@ -142,10 +145,7 @@ static inline struct bpf_prog *bpf_prog_get(u32 ufd) static inline void bpf_prog_put(struct bpf_prog *prog) { } -#endif - -/* verify correctness of eBPF program */ -int bpf_check(struct bpf_prog *fp, union bpf_attr *attr); +#endif /* CONFIG_BPF_SYSCALL */ /* verifier prototypes for helper functions called from eBPF programs */ extern const struct bpf_func_proto bpf_map_lookup_elem_proto; -- cgit v1.2.3 From d476059e77d1af48453a58f9de1e36f2eaff6450 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 2 Mar 2015 00:11:09 -0600 Subject: net: Kill dev_rebuild_header Now that there are no more users kill dev_rebuild_header and all of it's implementations. This is long overdue. Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- include/linux/etherdevice.h | 1 - include/linux/netdevice.h | 12 +----------- 2 files changed, 1 insertion(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index 1d869d185a0d..606563ef8a72 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -35,7 +35,6 @@ extern const struct header_ops eth_header_ops; int eth_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, const void *daddr, const void *saddr, unsigned len); -int eth_rebuild_header(struct sk_buff *skb); int eth_header_parse(const struct sk_buff *skb, unsigned char *haddr); int eth_header_cache(const struct neighbour *neigh, struct hh_cache *hh, __be16 type); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 5897b4ea5a3f..2007f3b44d05 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -261,7 +261,6 @@ struct header_ops { unsigned short type, const void *daddr, const void *saddr, unsigned int len); int (*parse)(const struct sk_buff *skb, unsigned char *haddr); - int (*rebuild)(struct sk_buff *skb); int (*cache)(const struct neighbour *neigh, struct hh_cache *hh, __be16 type); void (*cache_update)(struct hh_cache *hh, const struct net_device *dev, @@ -1346,7 +1345,7 @@ enum netdev_priv_flags { * if one wants to override the ndo_*() functions * @ethtool_ops: Management operations * @fwd_ops: Management operations - * @header_ops: Includes callbacks for creating,parsing,rebuilding,etc + * @header_ops: Includes callbacks for creating,parsing,caching,etc * of Layer 2 headers. * * @flags: Interface flags (a la BSD) @@ -2399,15 +2398,6 @@ static inline int dev_parse_header(const struct sk_buff *skb, return dev->header_ops->parse(skb, haddr); } -static inline int dev_rebuild_header(struct sk_buff *skb) -{ - const struct net_device *dev = skb->dev; - - if (!dev->header_ops || !dev->header_ops->rebuild) - return 0; - return dev->header_ops->rebuild(skb); -} - typedef int gifconf_func_t(struct net_device * dev, char __user * bufptr, int len); int register_gifconf(unsigned int family, gifconf_func_t *gifconf); static inline int unregister_gifconf(unsigned int family) -- cgit v1.2.3 From 0189197f441602acdca3f97750d392a895b778fd Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 3 Mar 2015 19:10:47 -0600 Subject: mpls: Basic routing support This change adds a new Kconfig option MPLS_ROUTING. The core of this change is the code to look at an mpls packet received from another machine. Look that packet up in a routing table and forward the packet on. Support of MPLS over ATM is not considered or attempted here. This implemntation follows RFC3032 and implements the MPLS shim header that can pass over essentially any network. What RFC3021 refers to as the as the Incoming Label Map (ILM) I call net->mpls.platform_label[]. What RFC3031 refers to as the Next Label Hop Forwarding Entry (NHLFE) I call mpls_route. Though calling it the label fordwarding information base (lfib) might also be valid. Further the implemntation forwards packets as described in RFC3032. There is no need and given the original motivation for MPLS a strong discincentive to have a flexible label forwarding path. In essence the logic is the topmost label is read, looked up, removed, and replaced by 0 or more new lables and the sent out the specified interface to it's next hop. Quite a few optional features are not implemented here. Among them are generation of ICMP errors when the TTL is exceeded or the packet is larger than the next hop MTU (those conditions are detected and the packets are dropped instead of generating an icmp error). The traffic class field is always set to 0. The implementation focuses on IP over MPLS and does not handle egress of other kinds of protocols. Instead of implementing coordination with the neighbour table and sorting out how to input next hops in a different address family (for which there is value). I was lazy and implemented a next hop mac address instead. The code is simpler and there are flavor of MPLS such as MPLS-TP where neither an IPv4 nor an IPv6 next hop is appropriate so a next hop by mac address would need to be implemented at some point. Two new definitions AF_MPLS and PF_MPLS are exposed to userspace. Decoding the mpls header must be done by first byeswapping a 32bit bit endian word into the local cpu endian and then bit shifting to extract the pieces. There is no C bit-field that can represent a wire format mpls header on a little endian machine as the low bits of the 20bit label wind up in the wrong half of third byte. Therefore internally everything is deal with in cpu native byte order except when writing to and reading from a packet. For management simplicity if a label is configured to forward out an interface that is down the packet is dropped early. Similarly if an network interface is removed rt_dev is updated to NULL (so no reference is preserved) and any packets for that label are dropped. Keeping the label entries in the kernel allows the kernel label table to function as the definitive source of which labels are allocated and which are not. Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- include/linux/socket.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/socket.h b/include/linux/socket.h index 5c19cba34dce..fab4d0ddf4ed 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -181,6 +181,7 @@ struct ucred { #define AF_WANPIPE 25 /* Wanpipe API Sockets */ #define AF_LLC 26 /* Linux LLC */ #define AF_IB 27 /* Native InfiniBand address */ +#define AF_MPLS 28 /* MPLS */ #define AF_CAN 29 /* Controller Area Network */ #define AF_TIPC 30 /* TIPC sockets */ #define AF_BLUETOOTH 31 /* Bluetooth sockets */ @@ -226,6 +227,7 @@ struct ucred { #define PF_WANPIPE AF_WANPIPE #define PF_LLC AF_LLC #define PF_IB AF_IB +#define PF_MPLS AF_MPLS #define PF_CAN AF_CAN #define PF_TIPC AF_TIPC #define PF_BLUETOOTH AF_BLUETOOTH -- cgit v1.2.3 From c32ec2a11321978c34296d9a6bd5b0c31a2eb182 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Wed, 4 Mar 2015 12:14:41 +0100 Subject: bcma: make bcma_host_pci_(up|down) calls safe for every config MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We were providing declarations but actual code was compiled only with CONFIG_BCMA_HOST_PCI set. This could result in: ERROR: "bcma_host_pci_down" [drivers/net/wireless/brcm80211/brcmsmac/brcmsmac.ko] undefined! ERROR: "bcma_host_pci_up" [drivers/net/wireless/brcm80211/brcmsmac/brcmsmac.ko] undefined! ERROR: "bcma_host_pci_down" [drivers/net/wireless/b43/b43.ko] undefined! ERROR: "bcma_host_pci_up" [drivers/net/wireless/b43/b43.ko] undefined! Reported-by: Arnd Bergmann Signed-off-by: Rafał Miłecki Signed-off-by: Kalle Valo --- include/linux/bcma/bcma.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bcma/bcma.h b/include/linux/bcma/bcma.h index 037620b3f113..44057b45ed32 100644 --- a/include/linux/bcma/bcma.h +++ b/include/linux/bcma/bcma.h @@ -434,8 +434,17 @@ static inline struct bcma_device *bcma_find_core(struct bcma_bus *bus, return bcma_find_core_unit(bus, coreid, 0); } +#ifdef CONFIG_BCMA_HOST_PCI extern void bcma_host_pci_up(struct bcma_bus *bus); extern void bcma_host_pci_down(struct bcma_bus *bus); +#else +static inline void bcma_host_pci_up(struct bcma_bus *bus) +{ +} +static inline void bcma_host_pci_down(struct bcma_bus *bus) +{ +} +#endif extern bool bcma_core_is_enabled(struct bcma_device *core); extern void bcma_core_disable(struct bcma_device *core, u32 flags); -- cgit v1.2.3 From 0a4e699a41f767dff76ca7dc1019b9ca6de3eb42 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Wed, 4 Mar 2015 14:24:52 +0100 Subject: bcma: move internal function declarations to private header MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These functions are not exported nor used anywhere, so there is no reason to put them in public headers. Also drop unused bcma_chipco_(suspend|resume). Signed-off-by: Rafał Miłecki Signed-off-by: Kalle Valo --- include/linux/bcma/bcma_driver_chipcommon.h | 11 ----------- include/linux/bcma/bcma_driver_gmac_cmn.h | 6 ------ include/linux/bcma/bcma_driver_mips.h | 15 --------------- include/linux/bcma/bcma_driver_pci.h | 2 -- include/linux/bcma/bcma_driver_pcie2.h | 2 -- 5 files changed, 36 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bcma/bcma_driver_chipcommon.h b/include/linux/bcma/bcma_driver_chipcommon.h index db6fa217f98b..6cceedf65ca2 100644 --- a/include/linux/bcma/bcma_driver_chipcommon.h +++ b/include/linux/bcma/bcma_driver_chipcommon.h @@ -663,14 +663,6 @@ struct bcma_drv_cc_b { #define bcma_cc_maskset32(cc, offset, mask, set) \ bcma_cc_write32(cc, offset, (bcma_cc_read32(cc, offset) & (mask)) | (set)) -extern void bcma_core_chipcommon_init(struct bcma_drv_cc *cc); -extern void bcma_core_chipcommon_early_init(struct bcma_drv_cc *cc); - -extern void bcma_chipco_suspend(struct bcma_drv_cc *cc); -extern void bcma_chipco_resume(struct bcma_drv_cc *cc); - -void bcma_chipco_bcm4331_ext_pa_lines_ctl(struct bcma_drv_cc *cc, bool enable); - extern u32 bcma_chipco_watchdog_timer_set(struct bcma_drv_cc *cc, u32 ticks); extern u32 bcma_chipco_get_alp_clock(struct bcma_drv_cc *cc); @@ -690,9 +682,6 @@ u32 bcma_chipco_gpio_pullup(struct bcma_drv_cc *cc, u32 mask, u32 value); u32 bcma_chipco_gpio_pulldown(struct bcma_drv_cc *cc, u32 mask, u32 value); /* PMU support */ -extern void bcma_pmu_init(struct bcma_drv_cc *cc); -extern void bcma_pmu_early_init(struct bcma_drv_cc *cc); - extern void bcma_chipco_pll_write(struct bcma_drv_cc *cc, u32 offset, u32 value); extern void bcma_chipco_pll_maskset(struct bcma_drv_cc *cc, u32 offset, diff --git a/include/linux/bcma/bcma_driver_gmac_cmn.h b/include/linux/bcma/bcma_driver_gmac_cmn.h index 4dd1f33e36a2..4354d4ea6713 100644 --- a/include/linux/bcma/bcma_driver_gmac_cmn.h +++ b/include/linux/bcma/bcma_driver_gmac_cmn.h @@ -91,10 +91,4 @@ struct bcma_drv_gmac_cmn { #define gmac_cmn_write16(gc, offset, val) bcma_write16((gc)->core, offset, val) #define gmac_cmn_write32(gc, offset, val) bcma_write32((gc)->core, offset, val) -#ifdef CONFIG_BCMA_DRIVER_GMAC_CMN -extern void bcma_core_gmac_cmn_init(struct bcma_drv_gmac_cmn *gc); -#else -static inline void bcma_core_gmac_cmn_init(struct bcma_drv_gmac_cmn *gc) { } -#endif - #endif /* LINUX_BCMA_DRIVER_GMAC_CMN_H_ */ diff --git a/include/linux/bcma/bcma_driver_mips.h b/include/linux/bcma/bcma_driver_mips.h index 0b3b32aeeb8a..8eea7f9e33b4 100644 --- a/include/linux/bcma/bcma_driver_mips.h +++ b/include/linux/bcma/bcma_driver_mips.h @@ -39,21 +39,6 @@ struct bcma_drv_mips { u8 early_setup_done:1; }; -#ifdef CONFIG_BCMA_DRIVER_MIPS -extern void bcma_core_mips_init(struct bcma_drv_mips *mcore); -extern void bcma_core_mips_early_init(struct bcma_drv_mips *mcore); - -extern unsigned int bcma_core_mips_irq(struct bcma_device *dev); -#else -static inline void bcma_core_mips_init(struct bcma_drv_mips *mcore) { } -static inline void bcma_core_mips_early_init(struct bcma_drv_mips *mcore) { } - -static inline unsigned int bcma_core_mips_irq(struct bcma_device *dev) -{ - return 0; -} -#endif - extern u32 bcma_cpu_clock(struct bcma_drv_mips *mcore); #endif /* LINUX_BCMA_DRIVER_MIPS_H_ */ diff --git a/include/linux/bcma/bcma_driver_pci.h b/include/linux/bcma/bcma_driver_pci.h index 6b8bca67851f..8e90004fdfd7 100644 --- a/include/linux/bcma/bcma_driver_pci.h +++ b/include/linux/bcma/bcma_driver_pci.h @@ -238,8 +238,6 @@ struct bcma_drv_pci { #define pcicore_write16(pc, offset, val) bcma_write16((pc)->core, offset, val) #define pcicore_write32(pc, offset, val) bcma_write32((pc)->core, offset, val) -extern void bcma_core_pci_early_init(struct bcma_drv_pci *pc); -extern void bcma_core_pci_init(struct bcma_drv_pci *pc); extern int bcma_core_pci_irq_ctl(struct bcma_bus *bus, struct bcma_device *core, bool enable); extern void bcma_core_pci_power_save(struct bcma_bus *bus, bool up); diff --git a/include/linux/bcma/bcma_driver_pcie2.h b/include/linux/bcma/bcma_driver_pcie2.h index d8c43294c527..31e6d17ab798 100644 --- a/include/linux/bcma/bcma_driver_pcie2.h +++ b/include/linux/bcma/bcma_driver_pcie2.h @@ -155,6 +155,4 @@ struct bcma_drv_pcie2 { #define pcie2_set32(pcie2, offset, set) bcma_set32((pcie2)->core, offset, set) #define pcie2_mask32(pcie2, offset, mask) bcma_mask32((pcie2)->core, offset, mask) -void bcma_core_pcie2_init(struct bcma_drv_pcie2 *pcie2); - #endif /* LINUX_BCMA_DRIVER_PCIE2_H_ */ -- cgit v1.2.3 From 842a9ae08a25671db3d4f689eed68b4d64be15b5 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Wed, 4 Mar 2015 12:54:21 +0200 Subject: bridge: Extend Proxy ARP design to allow optional rules for Wi-Fi This extends the design in commit 958501163ddd ("bridge: Add support for IEEE 802.11 Proxy ARP") with optional set of rules that are needed to meet the IEEE 802.11 and Hotspot 2.0 requirements for ProxyARP. The previously added BR_PROXYARP behavior is left as-is and a new BR_PROXYARP_WIFI alternative is added so that this behavior can be configured from user space when required. In addition, this enables proxyarp functionality for unicast ARP requests for both BR_PROXYARP and BR_PROXYARP_WIFI since it is possible to use unicast as well as broadcast for these frames. The key differences in functionality: BR_PROXYARP: - uses the flag on the bridge port on which the request frame was received to determine whether to reply - block bridge port flooding completely on ports that enable proxy ARP BR_PROXYARP_WIFI: - uses the flag on the bridge port to which the target device of the request belongs - block bridge port flooding selectively based on whether the proxyarp functionality replied Signed-off-by: Jouni Malinen Signed-off-by: David S. Miller --- include/linux/if_bridge.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h index a57bca2ea97e..dad8b00beed2 100644 --- a/include/linux/if_bridge.h +++ b/include/linux/if_bridge.h @@ -44,6 +44,7 @@ struct br_ip_list { #define BR_PROMISC BIT(7) #define BR_PROXYARP BIT(8) #define BR_LEARNING_SYNC BIT(9) +#define BR_PROXYARP_WIFI BIT(10) extern void brioctl_set(int (*ioctl_hook)(struct net *, unsigned int, void __user *)); -- cgit v1.2.3 From 4586f1bb911ce219a4bc1c2a9d6eee2e058b2b51 Mon Sep 17 00:00:00 2001 From: Scott Feldman Date: Thu, 5 Mar 2015 21:21:14 -0800 Subject: netdevice: add IPv4 fib add/del ops Add two new ndo ops for IPv4 fib offload support, add and del. Add uses modifiy semantics if fib entry already offloaded. Drivers implementing the new ndo ops will return err<0 if programming device fails, for example if device's tables are full. Signed-off-by: Scott Feldman Signed-off-by: David S. Miller --- include/linux/netdevice.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 625c8d71511b..45413784a3b1 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -768,6 +768,8 @@ struct netdev_phys_item_id { typedef u16 (*select_queue_fallback_t)(struct net_device *dev, struct sk_buff *skb); +struct fib_info; + /* * This structure defines the management hooks for network devices. * The following hooks can be defined; unless noted otherwise, they are @@ -1031,6 +1033,14 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev, * int (*ndo_switch_port_stp_update)(struct net_device *dev, u8 state); * Called to notify switch device port of bridge port STP * state change. + * int (*ndo_sw_parent_fib_ipv4_add)(struct net_device *dev, __be32 dst, + * int dst_len, struct fib_info *fi, + * u8 tos, u8 type, u32 tb_id); + * Called to add/modify IPv4 route to switch device. + * int (*ndo_sw_parent_fib_ipv4_del)(struct net_device *dev, __be32 dst, + * int dst_len, struct fib_info *fi, + * u8 tos, u8 type, u32 tb_id); + * Called to delete IPv4 route from switch device. */ struct net_device_ops { int (*ndo_init)(struct net_device *dev); @@ -1192,6 +1202,18 @@ struct net_device_ops { struct netdev_phys_item_id *psid); int (*ndo_switch_port_stp_update)(struct net_device *dev, u8 state); + int (*ndo_switch_fib_ipv4_add)(struct net_device *dev, + __be32 dst, + int dst_len, + struct fib_info *fi, + u8 tos, u8 type, + u32 tb_id); + int (*ndo_switch_fib_ipv4_del)(struct net_device *dev, + __be32 dst, + int dst_len, + struct fib_info *fi, + u8 tos, u8 type, + u32 tb_id); #endif }; -- cgit v1.2.3 From d237baa1cbb3a2335357484c1d63a810a01947e2 Mon Sep 17 00:00:00 2001 From: Shani Michaeli Date: Thu, 5 Mar 2015 20:16:12 +0200 Subject: net/mlx4_core: Add basic elements for QCN Add device capability, firmware command opcode and etc prior elements needed for QCN suppprt. Disable SRIOV VF view/access for QCN is disabled. While here, remove a redundant offset definition into the QUERY_DEV_CAP mailbox. Signed-off-by: Shani Michaeli Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/mlx4/cmd.h | 13 +++++++++++++ include/linux/mlx4/device.h | 3 ++- 2 files changed, 15 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h index 7b6d4e9ff603..7299e9548906 100644 --- a/include/linux/mlx4/cmd.h +++ b/include/linux/mlx4/cmd.h @@ -163,6 +163,9 @@ enum { MLX4_QP_FLOW_STEERING_ATTACH = 0x65, MLX4_QP_FLOW_STEERING_DETACH = 0x66, MLX4_FLOW_STEERING_IB_UC_QP_RANGE = 0x64, + + /* Update and read QCN parameters */ + MLX4_CMD_CONGESTION_CTRL_OPCODE = 0x68, }; enum { @@ -233,6 +236,16 @@ struct mlx4_config_dev_params { u8 rx_csum_flags_port_2; }; +enum mlx4_en_congestion_control_algorithm { + MLX4_CTRL_ALGO_802_1_QAU_REACTION_POINT = 0, +}; + +enum mlx4_en_congestion_control_opmod { + MLX4_CONGESTION_CONTROL_GET_PARAMS, + MLX4_CONGESTION_CONTROL_GET_STATISTICS, + MLX4_CONGESTION_CONTROL_SET_PARAMS = 4, +}; + struct mlx4_dev; struct mlx4_cmd_mailbox { diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index e4ebff7e9d02..1cc54822b931 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -203,7 +203,8 @@ enum { MLX4_DEV_CAP_FLAG2_80_VFS = 1LL << 18, MLX4_DEV_CAP_FLAG2_FS_A0 = 1LL << 19, MLX4_DEV_CAP_FLAG2_RECOVERABLE_ERROR_EVENT = 1LL << 20, - MLX4_DEV_CAP_FLAG2_PORT_REMAP = 1LL << 21 + MLX4_DEV_CAP_FLAG2_PORT_REMAP = 1LL << 21, + MLX4_DEV_CAP_FLAG2_QCN = 1LL << 22, }; enum { -- cgit v1.2.3 From 8bd63cf1a426e69bf4f611b08978f721e46c194f Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 5 Mar 2015 00:52:33 +0100 Subject: bridge: move mac header copying into br_netfilter The mac header only has to be copied back into the skb for fragments generated by ip_fragment(), which only happens for bridge forwarded packets with nf-call-iptables=1 && active nf_defrag. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_bridge.h | 31 ------------------------------- 1 file changed, 31 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h index c755e4971fa3..332ef8ab37e9 100644 --- a/include/linux/netfilter_bridge.h +++ b/include/linux/netfilter_bridge.h @@ -44,36 +44,6 @@ static inline void nf_bridge_update_protocol(struct sk_buff *skb) skb->protocol = htons(ETH_P_PPP_SES); } -/* Fill in the header for fragmented IP packets handled by - * the IPv4 connection tracking code. - * - * Only used in br_forward.c - */ -static inline int nf_bridge_copy_header(struct sk_buff *skb) -{ - int err; - unsigned int header_size; - - nf_bridge_update_protocol(skb); - header_size = ETH_HLEN + nf_bridge_encap_header_len(skb); - err = skb_cow_head(skb, header_size); - if (err) - return err; - - skb_copy_to_linear_data_offset(skb, -header_size, - skb->nf_bridge->data, header_size); - __skb_push(skb, nf_bridge_encap_header_len(skb)); - return 0; -} - -static inline int nf_bridge_maybe_copy_header(struct sk_buff *skb) -{ - if (skb->nf_bridge && - skb->nf_bridge->mask & (BRNF_BRIDGED | BRNF_BRIDGED_DNAT)) - return nf_bridge_copy_header(skb); - return 0; -} - static inline unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb) { if (unlikely(skb->nf_bridge->mask & BRNF_PPPoE)) @@ -119,7 +89,6 @@ static inline void br_drop_fake_rtable(struct sk_buff *skb) } #else -#define nf_bridge_maybe_copy_header(skb) (0) #define nf_bridge_pad(skb) (0) #define br_drop_fake_rtable(skb) do { } while (0) #endif /* CONFIG_BRIDGE_NETFILTER */ -- cgit v1.2.3 From 4a9d2f200862683d6680d5565f30c126625afe65 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 5 Mar 2015 00:52:34 +0100 Subject: netfilter: bridge: move nf_bridge_update_protocol to where its used no need to keep it in a header file. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_bridge.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h index 332ef8ab37e9..dd580a9a1add 100644 --- a/include/linux/netfilter_bridge.h +++ b/include/linux/netfilter_bridge.h @@ -36,14 +36,6 @@ static inline unsigned int nf_bridge_encap_header_len(const struct sk_buff *skb) } } -static inline void nf_bridge_update_protocol(struct sk_buff *skb) -{ - if (skb->nf_bridge->mask & BRNF_8021Q) - skb->protocol = htons(ETH_P_8021Q); - else if (skb->nf_bridge->mask & BRNF_PPPoE) - skb->protocol = htons(ETH_P_PPP_SES); -} - static inline unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb) { if (unlikely(skb->nf_bridge->mask & BRNF_PPPoE)) -- cgit v1.2.3 From e5de75bf88858f5b3ab11e2504b86ec059f03102 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 9 Mar 2015 12:30:12 +0100 Subject: netfilter: bridge: move DNAT helper to br_netfilter Only one caller, there is no need to keep this in a header. Move it to br_netfilter.c where this belongs to. Based on patch from Florian Westphal. Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_bridge.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h index dd580a9a1add..bb39113ea596 100644 --- a/include/linux/netfilter_bridge.h +++ b/include/linux/netfilter_bridge.h @@ -44,18 +44,6 @@ static inline unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb) } int br_handle_frame_finish(struct sk_buff *skb); -/* Only used in br_device.c */ -static inline int br_nf_pre_routing_finish_bridge_slow(struct sk_buff *skb) -{ - struct nf_bridge_info *nf_bridge = skb->nf_bridge; - - skb_pull(skb, ETH_HLEN); - nf_bridge->mask ^= BRNF_BRIDGED_DNAT; - skb_copy_to_linear_data_offset(skb, -(ETH_HLEN-ETH_ALEN), - skb->nf_bridge->data, ETH_HLEN-ETH_ALEN); - skb->dev = nf_bridge->physindev; - return br_handle_frame_finish(skb); -} /* This is called by the IP fragmenting code and it ensures there is * enough room for the encapsulating header (if there is one). */ -- cgit v1.2.3 From aa836df958886e57ff0d43fb3d79d1af4aec0cc8 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 9 Mar 2015 14:31:20 -0700 Subject: net: core: add of_find_net_device_by_node() Add a helper function which allows getting the struct net_device pointer associated with a given struct device_node pointer. This is useful for instance for DSA Ethernet devices not backed by a platform_device, but a PCI device. Since we need to access net_class which is not accessible outside of net/core/net-sysfs.c, this helper function is also added here and gated with CONFIG_OF_NET. Network devices initialized with SET_NETDEV_DEV() are also taken into account by checking for dev->parent first and then falling back to checking the device pointer within struct net_device. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/of_net.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/of_net.h b/include/linux/of_net.h index 34597c8c1a4c..9cd72aab76fe 100644 --- a/include/linux/of_net.h +++ b/include/linux/of_net.h @@ -9,8 +9,11 @@ #ifdef CONFIG_OF_NET #include + +struct net_device; extern int of_get_phy_mode(struct device_node *np); extern const void *of_get_mac_address(struct device_node *np); +extern struct net_device *of_find_net_device_by_node(struct device_node *np); #else static inline int of_get_phy_mode(struct device_node *np) { @@ -21,6 +24,11 @@ static inline const void *of_get_mac_address(struct device_node *np) { return NULL; } + +static inline struct net_device *of_find_net_device_by_node(struct device_node *np) +{ + return NULL; +} #endif #endif /* __LINUX_OF_NET_H */ -- cgit v1.2.3 From f8f2147150de303e814c0452075d467734d3544b Mon Sep 17 00:00:00 2001 From: Scott Feldman Date: Mon, 9 Mar 2015 13:59:09 -0700 Subject: switchdev: add netlink flags to IPv4 FIB add op Pass in the netlink flags (NLM_F_*) into switchdev driver for IPv4 FIB add op to allow driver to 1) optimize hardware updates, 2) handle ip route prepend and append commands correctly. Suggested-by: Jamal Hadi Salim Suggested-by: Roopa Prabhu Signed-off-by: Scott Feldman Reviewed-by: Simon Horman Acked-by: Roopa Prabhu Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 45413784a3b1..1354ae83efc8 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1035,7 +1035,7 @@ struct fib_info; * state change. * int (*ndo_sw_parent_fib_ipv4_add)(struct net_device *dev, __be32 dst, * int dst_len, struct fib_info *fi, - * u8 tos, u8 type, u32 tb_id); + * u8 tos, u8 type, u32 nlflags, u32 tb_id); * Called to add/modify IPv4 route to switch device. * int (*ndo_sw_parent_fib_ipv4_del)(struct net_device *dev, __be32 dst, * int dst_len, struct fib_info *fi, @@ -1207,6 +1207,7 @@ struct net_device_ops { int dst_len, struct fib_info *fi, u8 tos, u8 type, + u32 nlflags, u32 tb_id); int (*ndo_switch_fib_ipv4_del)(struct net_device *dev, __be32 dst, -- cgit v1.2.3 From 59e33c2b021322db92ca27c4d9958e57630443b6 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 9 Mar 2015 15:44:13 -0700 Subject: net: phy: bcm7xxx: add alternate id for 7439 BCM7439 has an alternate PHY OUI: 0xae025080 which is to be found in some variants of this chip. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/brcmphy.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index 7ccd928cc1f2..cab606617522 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -19,6 +19,7 @@ #define PHY_ID_BCM7425 0x03625e60 #define PHY_ID_BCM7429 0x600d8730 #define PHY_ID_BCM7439 0x600d8480 +#define PHY_ID_BCM7439_2 0xae025080 #define PHY_ID_BCM7445 0x600d8510 #define PHY_BCM_OUI_MASK 0xfffffc00 -- cgit v1.2.3 From 491da2a477077357c8206a601559e2ea58f224db Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 10 Mar 2015 07:15:52 -0700 Subject: net: constify sock_diag_check_cookie() sock_diag_check_cookie() second parameter is constant Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/sock_diag.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sock_diag.h b/include/linux/sock_diag.h index 46cca4c06848..b5ad7d35a636 100644 --- a/include/linux/sock_diag.h +++ b/include/linux/sock_diag.h @@ -19,7 +19,7 @@ void sock_diag_unregister(const struct sock_diag_handler *h); void sock_diag_register_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh)); void sock_diag_unregister_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh)); -int sock_diag_check_cookie(void *sk, __u32 *cookie); +int sock_diag_check_cookie(void *sk, const __u32 *cookie); void sock_diag_save_cookie(void *sk, __u32 *cookie); int sock_diag_put_meminfo(struct sock *sk, struct sk_buff *skb, int attr); -- cgit v1.2.3 From 34160ea3f9c96b5ae71a11459f9b9f6c298b8930 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 10 Mar 2015 07:15:54 -0700 Subject: inet_diag: add const to inet_diag_req_v2 diag dumpers should not modify the request. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/inet_diag.h | 43 ++++++++++++++++++++++--------------------- 1 file changed, 22 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h index 46da02410a09..ac48b10c9395 100644 --- a/include/linux/inet_diag.h +++ b/include/linux/inet_diag.h @@ -11,33 +11,34 @@ struct sk_buff; struct netlink_callback; struct inet_diag_handler { - void (*dump)(struct sk_buff *skb, - struct netlink_callback *cb, - struct inet_diag_req_v2 *r, - struct nlattr *bc); - - int (*dump_one)(struct sk_buff *in_skb, - const struct nlmsghdr *nlh, - struct inet_diag_req_v2 *req); - - void (*idiag_get_info)(struct sock *sk, - struct inet_diag_msg *r, - void *info); - __u16 idiag_type; + void (*dump)(struct sk_buff *skb, + struct netlink_callback *cb, + const struct inet_diag_req_v2 *r, + struct nlattr *bc); + + int (*dump_one)(struct sk_buff *in_skb, + const struct nlmsghdr *nlh, + const struct inet_diag_req_v2 *req); + + void (*idiag_get_info)(struct sock *sk, + struct inet_diag_msg *r, + void *info); + __u16 idiag_type; }; struct inet_connection_sock; int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, - struct sk_buff *skb, struct inet_diag_req_v2 *req, - struct user_namespace *user_ns, - u32 pid, u32 seq, u16 nlmsg_flags, - const struct nlmsghdr *unlh); + struct sk_buff *skb, const struct inet_diag_req_v2 *req, + struct user_namespace *user_ns, + u32 pid, u32 seq, u16 nlmsg_flags, + const struct nlmsghdr *unlh); void inet_diag_dump_icsk(struct inet_hashinfo *h, struct sk_buff *skb, - struct netlink_callback *cb, struct inet_diag_req_v2 *r, - struct nlattr *bc); + struct netlink_callback *cb, + const struct inet_diag_req_v2 *r, + struct nlattr *bc); int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, - struct sk_buff *in_skb, const struct nlmsghdr *nlh, - struct inet_diag_req_v2 *req); + struct sk_buff *in_skb, const struct nlmsghdr *nlh, + const struct inet_diag_req_v2 *req); int inet_diag_bc_sk(const struct nlattr *_bc, struct sock *sk); -- cgit v1.2.3 From 988dfbd795cf08b00576c1ced4210281b2bccffc Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 10 Mar 2015 09:27:55 +1100 Subject: rhashtable: Move hash_rnd into bucket_table Currently hash_rnd is a parameter that users can set. However, no existing users set this parameter. It is also something that people are unlikely to want to set directly since it's just a random number. In preparation for allowing the reseeding/rehashing of rhashtable, this patch moves hash_rnd into bucket_table so that it's now an internal state rather than a parameter. Signed-off-by: Herbert Xu Acked-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index d438eeb08bff..5ef8ea551556 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -49,12 +49,14 @@ struct rhash_head { /** * struct bucket_table - Table of hash buckets * @size: Number of hash buckets + * @hash_rnd: Random seed to fold into hash * @locks_mask: Mask to apply before accessing locks[] * @locks: Array of spinlocks protecting individual buckets * @buckets: size * hash buckets */ struct bucket_table { size_t size; + u32 hash_rnd; unsigned int locks_mask; spinlock_t *locks; @@ -72,7 +74,6 @@ struct rhashtable; * @key_len: Length of key * @key_offset: Offset of key in struct to be hashed * @head_offset: Offset of rhash_head in struct to be hashed - * @hash_rnd: Seed to use while hashing * @max_shift: Maximum number of shifts while expanding * @min_shift: Minimum number of shifts while shrinking * @nulls_base: Base value to generate nulls marker @@ -85,7 +86,6 @@ struct rhashtable_params { size_t key_len; size_t key_offset; size_t head_offset; - u32 hash_rnd; size_t max_shift; size_t min_shift; u32 nulls_base; -- cgit v1.2.3 From 33d6737761ea425d43f3b6e4561573a4020982f2 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 10 Mar 2015 16:57:11 -0700 Subject: of: mdio: export of_mdio_parse_addr Export of_mdio_parse_addr() which allows parsing a given Ethernet PHY node MDIO address, verify it is within the allowed range, and return its value. This is going to be useful for the DSA code which needs to deal with multiple layers of MDIO buses. Signed-off-by: Florian Fainelli Acked-by: Rob Herring Signed-off-by: David S. Miller --- include/linux/of_mdio.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/of_mdio.h b/include/linux/of_mdio.h index d449018d0726..8f2237eb3485 100644 --- a/include/linux/of_mdio.h +++ b/include/linux/of_mdio.h @@ -24,6 +24,7 @@ struct phy_device *of_phy_attach(struct net_device *dev, phy_interface_t iface); extern struct mii_bus *of_mdio_find_bus(struct device_node *mdio_np); +extern int of_mdio_parse_addr(struct device *dev, const struct device_node *np); #else /* CONFIG_OF */ static inline int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np) @@ -60,6 +61,12 @@ static inline struct mii_bus *of_mdio_find_bus(struct device_node *mdio_np) { return NULL; } + +static inline int of_mdio_parse_addr(struct device *dev, + const struct device_node *np) +{ + return -ENOSYS; +} #endif /* CONFIG_OF */ #if defined(CONFIG_OF) && defined(CONFIG_FIXED_PHY) -- cgit v1.2.3 From 33cf7c90fe2f97afb1cadaa0cfb782cb9d1b9ee2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 11 Mar 2015 18:53:14 -0700 Subject: net: add real socket cookies A long standing problem in netlink socket dumps is the use of kernel socket addresses as cookies. 1) It is a security concern. 2) Sockets can be reused quite quickly, so there is no guarantee a cookie is used once and identify a flow. 3) request sock, establish sock, and timewait socks for a given flow have different cookies. Part of our effort to bring better TCP statistics requires to switch to a different allocator. In this patch, I chose to use a per network namespace 64bit generator, and to use it only in the case a socket needs to be dumped to netlink. (This might be refined later if needed) Note that I tried to carry cookies from request sock, to establish sock, then timewait sockets. Signed-off-by: Eric Dumazet Cc: Eric Salo Signed-off-by: David S. Miller --- include/linux/sock_diag.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sock_diag.h b/include/linux/sock_diag.h index b5ad7d35a636..083ac388098e 100644 --- a/include/linux/sock_diag.h +++ b/include/linux/sock_diag.h @@ -19,8 +19,8 @@ void sock_diag_unregister(const struct sock_diag_handler *h); void sock_diag_register_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh)); void sock_diag_unregister_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh)); -int sock_diag_check_cookie(void *sk, const __u32 *cookie); -void sock_diag_save_cookie(void *sk, __u32 *cookie); +int sock_diag_check_cookie(struct sock *sk, const __u32 *cookie); +void sock_diag_save_cookie(struct sock *sk, __u32 *cookie); int sock_diag_put_meminfo(struct sock *sk, struct sk_buff *skb, int attr); int sock_diag_put_filterinfo(bool may_report_filterinfo, struct sock *sk, -- cgit v1.2.3 From efd7ef1c1929d7a0329d4349252863c04d6f1729 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 11 Mar 2015 23:04:08 -0500 Subject: net: Kill hold_net release_net hold_net and release_net were an idea that turned out to be useless. The code has been disabled since 2008. Kill the code it is long past due. Signed-off-by: "Eric W. Biederman" Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 1354ae83efc8..cede40d9cac9 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1864,8 +1864,7 @@ static inline void dev_net_set(struct net_device *dev, struct net *net) { #ifdef CONFIG_NET_NS - release_net(dev->nd_net); - dev->nd_net = hold_net(net); + dev->nd_net = net; #endif } -- cgit v1.2.3 From 0c5c9fb55106333e773de8c9dd321fa8240caeb3 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 11 Mar 2015 23:06:44 -0500 Subject: net: Introduce possible_net_t Having to say > #ifdef CONFIG_NET_NS > struct net *net; > #endif in structures is a little bit wordy and a little bit error prone. Instead it is possible to say: > typedef struct { > #ifdef CONFIG_NET_NS > struct net *net; > #endif > } possible_net_t; And then in a header say: > possible_net_t net; Which is cleaner and easier to use and easier to test, as the possible_net_t is always there no matter what the compile options. Further this allows read_pnet and write_pnet to be functions in all cases which is better at catching typos. This change adds possible_net_t, updates the definitions of read_pnet and write_pnet, updates optional struct net * variables that write_pnet uses on to have the type possible_net_t, and finally fixes up the b0rked users of read_pnet and write_pnet. Signed-off-by: "Eric W. Biederman" Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index cede40d9cac9..ddab1a2a07a0 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1721,9 +1721,7 @@ struct net_device { struct netpoll_info __rcu *npinfo; #endif -#ifdef CONFIG_NET_NS - struct net *nd_net; -#endif + possible_net_t nd_net; /* mid-layer private */ union { @@ -1863,9 +1861,7 @@ struct net *dev_net(const struct net_device *dev) static inline void dev_net_set(struct net_device *dev, struct net *net) { -#ifdef CONFIG_NET_NS - dev->nd_net = net; -#endif + write_pnet(&dev->nd_net, net); } static inline bool netdev_uses_dsa(struct net_device *dev) -- cgit v1.2.3 From 80f1d68ccba70b1060c9c7360ca83da430f66bed Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 12 Mar 2015 17:21:42 +0100 Subject: ebpf: verifier: check that call reg with ARG_ANYTHING is initialized I noticed that a helper function with argument type ARG_ANYTHING does not need to have an initialized value (register). This can worst case lead to unintented stack memory leakage in future helper functions if they are not carefully designed, or unintended application behaviour in case the application developer was not careful enough to match a correct helper function signature in the API. The underlying issue is that ARG_ANYTHING should actually be split into two different semantics: 1) ARG_DONTCARE for function arguments that the helper function does not care about (in other words: the default for unused function arguments), and 2) ARG_ANYTHING that is an argument actually being used by a helper function and *guaranteed* to be an initialized register. The current risk is low: ARG_ANYTHING is only used for the 'flags' argument (r4) in bpf_map_update_elem() that internally does strict checking. Fixes: 17a5267067f3 ("bpf: verifier (add verifier core)") Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index a884f5a2c503..80f2e0fc3d02 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -44,7 +44,7 @@ struct bpf_map_type_list { /* function argument constraints */ enum bpf_arg_type { - ARG_ANYTHING = 0, /* any argument is ok */ + ARG_DONTCARE = 0, /* unused argument in helper function */ /* the following constraints used to prototype * bpf_map_lookup/update/delete_elem() functions @@ -58,6 +58,8 @@ enum bpf_arg_type { */ ARG_PTR_TO_STACK, /* any pointer to eBPF program stack */ ARG_CONST_STACK_SIZE, /* number of bytes accessed from stack */ + + ARG_ANYTHING, /* any (initialized) argument is ok */ }; /* type of values returned from helper functions */ -- cgit v1.2.3 From a5b6846f9e1a080493210013385c28faecee36f0 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 12 Mar 2015 15:28:40 +0100 Subject: rhashtable: kill ht->shift atomic operations Commit c0c09bfdc415 ("rhashtable: avoid unnecessary wakeup for worker queue") changed ht->shift to be atomic, which is actually unnecessary. Instead of leaving the current shift in the core rhashtable structure, it can be cached inside the individual bucket tables. There, it will only be initialized once during a new table allocation in the shrink/expansion slow path, and from then onward it stays immutable for the rest of the bucket table liftime. That allows shift to be non-atomic. The patch also moves hash_rnd management into the table setup. The rhashtable structure now consumes 3 instead of 4 cachelines. Signed-off-by: Daniel Borkmann Cc: Ying Xue Acked-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 5ef8ea551556..c93ff8ac474a 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -50,6 +50,7 @@ struct rhash_head { * struct bucket_table - Table of hash buckets * @size: Number of hash buckets * @hash_rnd: Random seed to fold into hash + * @shift: Current size (1 << shift) * @locks_mask: Mask to apply before accessing locks[] * @locks: Array of spinlocks protecting individual buckets * @buckets: size * hash buckets @@ -57,6 +58,7 @@ struct rhash_head { struct bucket_table { size_t size; u32 hash_rnd; + u32 shift; unsigned int locks_mask; spinlock_t *locks; @@ -99,7 +101,6 @@ struct rhashtable_params { * @tbl: Bucket table * @future_tbl: Table under construction during expansion/shrinking * @nelems: Number of elements in table - * @shift: Current size (1 << shift) * @p: Configuration parameters * @run_work: Deferred worker to expand/shrink asynchronously * @mutex: Mutex to protect current/future table swapping @@ -110,12 +111,11 @@ struct rhashtable { struct bucket_table __rcu *tbl; struct bucket_table __rcu *future_tbl; atomic_t nelems; - atomic_t shift; + bool being_destroyed; struct rhashtable_params p; struct work_struct run_work; struct mutex mutex; struct list_head walkers; - bool being_destroyed; }; /** -- cgit v1.2.3 From 9c51026509d7fd11d84e0035008e1a9768960f2b Mon Sep 17 00:00:00 2001 From: Syed Asifful Dayyan Date: Fri, 6 Mar 2015 18:40:42 +0100 Subject: brcmfmac: Add support for BCM4345 SDIO chipset. These changes add support for BCM4345 SDIO chipset. Reviewed-by: Pieter-Paul Giesberts Reviewed-by: Arend Van Spriel Reviewed-by: Hante Meuleman Reviewed-by: Daniel (Deognyoun) Kim Signed-off-by: Syed Asifful Dayyan Signed-off-by: Arend van Spriel Signed-off-by: Kalle Valo --- include/linux/mmc/sdio_ids.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mmc/sdio_ids.h b/include/linux/mmc/sdio_ids.h index 996807963716..91397858dc95 100644 --- a/include/linux/mmc/sdio_ids.h +++ b/include/linux/mmc/sdio_ids.h @@ -33,6 +33,7 @@ #define SDIO_DEVICE_ID_BROADCOM_43341 0xa94d #define SDIO_DEVICE_ID_BROADCOM_4335_4339 0x4335 #define SDIO_DEVICE_ID_BROADCOM_43362 0xa962 +#define SDIO_DEVICE_ID_BROADCOM_4345 0x4345 #define SDIO_DEVICE_ID_BROADCOM_4354 0x4354 #define SDIO_VENDOR_ID_INTEL 0x0089 -- cgit v1.2.3 From 702131e2a393b45174be326f1dbe20b658b4f157 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Thu, 5 Mar 2015 18:25:10 +0100 Subject: bcma: move PCI IRQ control function to host specific code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This function isn't really related to any bus core. It touches PCI device config registers only, so move it to the (PCI) host file. Signed-off-by: Rafał Miłecki Signed-off-by: Kalle Valo --- include/linux/bcma/bcma.h | 9 +++++++++ include/linux/bcma/bcma_driver_pci.h | 2 -- 2 files changed, 9 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bcma/bcma.h b/include/linux/bcma/bcma.h index 44057b45ed32..e34f906647d3 100644 --- a/include/linux/bcma/bcma.h +++ b/include/linux/bcma/bcma.h @@ -437,6 +437,8 @@ static inline struct bcma_device *bcma_find_core(struct bcma_bus *bus, #ifdef CONFIG_BCMA_HOST_PCI extern void bcma_host_pci_up(struct bcma_bus *bus); extern void bcma_host_pci_down(struct bcma_bus *bus); +extern int bcma_host_pci_irq_ctl(struct bcma_bus *bus, + struct bcma_device *core, bool enable); #else static inline void bcma_host_pci_up(struct bcma_bus *bus) { @@ -444,6 +446,13 @@ static inline void bcma_host_pci_up(struct bcma_bus *bus) static inline void bcma_host_pci_down(struct bcma_bus *bus) { } +static inline int bcma_host_pci_irq_ctl(struct bcma_bus *bus, + struct bcma_device *core, bool enable) +{ + if (bus->hosttype == BCMA_HOSTTYPE_PCI) + return -ENOTSUPP; + return 0; +} #endif extern bool bcma_core_is_enabled(struct bcma_device *core); diff --git a/include/linux/bcma/bcma_driver_pci.h b/include/linux/bcma/bcma_driver_pci.h index 8e90004fdfd7..3a468687c170 100644 --- a/include/linux/bcma/bcma_driver_pci.h +++ b/include/linux/bcma/bcma_driver_pci.h @@ -238,8 +238,6 @@ struct bcma_drv_pci { #define pcicore_write16(pc, offset, val) bcma_write16((pc)->core, offset, val) #define pcicore_write32(pc, offset, val) bcma_write32((pc)->core, offset, val) -extern int bcma_core_pci_irq_ctl(struct bcma_bus *bus, - struct bcma_device *core, bool enable); extern void bcma_core_pci_power_save(struct bcma_bus *bus, bool up); extern int bcma_core_pci_pcibios_map_irq(const struct pci_dev *dev); -- cgit v1.2.3 From 982a40f5c0bb03368989a6b1ae833b474854e931 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Thu, 5 Mar 2015 18:25:11 +0100 Subject: bcma: allow disabling (not building) PCI driver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It isn't required for bcma bus on SoCs, so provide some empty functions and allow disabling it. Signed-off-by: Rafał Miłecki Signed-off-by: Kalle Valo --- include/linux/bcma/bcma_driver_pci.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bcma/bcma_driver_pci.h b/include/linux/bcma/bcma_driver_pci.h index 3a468687c170..5ba6918ca20b 100644 --- a/include/linux/bcma/bcma_driver_pci.h +++ b/include/linux/bcma/bcma_driver_pci.h @@ -238,7 +238,13 @@ struct bcma_drv_pci { #define pcicore_write16(pc, offset, val) bcma_write16((pc)->core, offset, val) #define pcicore_write32(pc, offset, val) bcma_write32((pc)->core, offset, val) +#ifdef CONFIG_BCMA_DRIVER_PCI extern void bcma_core_pci_power_save(struct bcma_bus *bus, bool up); +#else +static inline void bcma_core_pci_power_save(struct bcma_bus *bus, bool up) +{ +} +#endif extern int bcma_core_pci_pcibios_map_irq(const struct pci_dev *dev); extern int bcma_core_pci_plat_dev_init(struct pci_dev *dev); -- cgit v1.2.3 From 3f3c4bb5ec7c645d1151e1e8d6e56c71a050cf85 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Wed, 4 Mar 2015 21:19:59 +0100 Subject: mac802154: correct max sifs size handling This patch fix the max sifs size correction when the IEEE802154_HW_TX_OMIT_CKSUM flag is set. With this flag the sk_buff doesn't contain the CRC, because the transceiver will add the CRC while transmit. Also add some defines for the max sifs frame size value and frame check sequence according to 802.15.4 standard. Signed-off-by: Alexander Aring Acked-by: Marc Kleine-Budde Signed-off-by: Marcel Holtmann --- include/linux/ieee802154.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee802154.h b/include/linux/ieee802154.h index 40b0ab953937..8872ca103d06 100644 --- a/include/linux/ieee802154.h +++ b/include/linux/ieee802154.h @@ -30,6 +30,7 @@ #define IEEE802154_MTU 127 #define IEEE802154_ACK_PSDU_LEN 5 #define IEEE802154_MIN_PSDU_LEN 9 +#define IEEE802154_FCS_LEN 2 #define IEEE802154_PAN_ID_BROADCAST 0xffff #define IEEE802154_ADDR_SHORT_BROADCAST 0xffff @@ -39,6 +40,7 @@ #define IEEE802154_LIFS_PERIOD 40 #define IEEE802154_SIFS_PERIOD 12 +#define IEEE802154_MAX_SIFS_FRAME_SIZE 18 #define IEEE802154_MAX_CHANNEL 26 #define IEEE802154_MAX_PAGE 31 -- cgit v1.2.3 From eddee5ba34eb6c9890ef106f19ead2b370e5342f Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 14 Mar 2015 13:57:20 +1100 Subject: rhashtable: Fix walker behaviour during rehash Previously whenever the walker encountered a resize it simply snaps back to the beginning and starts again. However, this only works if the rehash started and completed while the walker was idle. If the walker attempts to restart while the rehash is still ongoing, we may miss objects that we shouldn't have. This patch fixes this by making the walker walk the old table followed by the new table just like all other readers. If a rehash is detected we will still signal our caller of the fact so they can prepare for duplicates but we will simply continue the walk onto the new table after the old one is finished either by us or by the rehasher. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index c93ff8ac474a..4192682c1d5c 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -53,6 +53,7 @@ struct rhash_head { * @shift: Current size (1 << shift) * @locks_mask: Mask to apply before accessing locks[] * @locks: Array of spinlocks protecting individual buckets + * @walkers: List of active walkers * @buckets: size * hash buckets */ struct bucket_table { @@ -61,6 +62,7 @@ struct bucket_table { u32 shift; unsigned int locks_mask; spinlock_t *locks; + struct list_head walkers; struct rhash_head __rcu *buckets[] ____cacheline_aligned_in_smp; }; @@ -104,7 +106,6 @@ struct rhashtable_params { * @p: Configuration parameters * @run_work: Deferred worker to expand/shrink asynchronously * @mutex: Mutex to protect current/future table swapping - * @walkers: List of active walkers * @being_destroyed: True if table is set up for destruction */ struct rhashtable { @@ -115,17 +116,16 @@ struct rhashtable { struct rhashtable_params p; struct work_struct run_work; struct mutex mutex; - struct list_head walkers; }; /** * struct rhashtable_walker - Hash table walker * @list: List entry on list of walkers - * @resize: Resize event occured + * @tbl: The table that we were walking over */ struct rhashtable_walker { struct list_head list; - bool resize; + struct bucket_table *tbl; }; /** -- cgit v1.2.3 From 9d901bc05153bbf33b5da2cd6266865e531f0545 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 14 Mar 2015 13:57:23 +1100 Subject: rhashtable: Free bucket tables asynchronously after rehash There is in fact no need to wait for an RCU grace period in the rehash function, since all insertions are guaranteed to go into the new table through spin locks. This patch uses call_rcu to free the old/rehashed table at our leisure. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 4192682c1d5c..a0abddd226b3 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -54,6 +54,7 @@ struct rhash_head { * @locks_mask: Mask to apply before accessing locks[] * @locks: Array of spinlocks protecting individual buckets * @walkers: List of active walkers + * @rcu: RCU structure for freeing the table * @buckets: size * hash buckets */ struct bucket_table { @@ -63,6 +64,7 @@ struct bucket_table { unsigned int locks_mask; spinlock_t *locks; struct list_head walkers; + struct rcu_head rcu; struct rhash_head __rcu *buckets[] ____cacheline_aligned_in_smp; }; -- cgit v1.2.3 From 63d512d0cffcae40505d9448abd509972465e846 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 14 Mar 2015 13:57:24 +1100 Subject: rhashtable: Add rehash counter to bucket_table This patch adds a rehash counter to bucket_table to indicate the last bucket that has been rehashed. This serves two purposes: 1. Any bucket that has been rehashed can never gain a new object. 2. If the rehash counter reaches the size of the table, the table will forever remain empty. This patch also downsizes bucket_table->size to an unsigned int since we do not support sizes greater than 32 bits yet. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index a0abddd226b3..ed7562ad4ca0 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -49,6 +49,7 @@ struct rhash_head { /** * struct bucket_table - Table of hash buckets * @size: Number of hash buckets + * @rehash: Current bucket being rehashed * @hash_rnd: Random seed to fold into hash * @shift: Current size (1 << shift) * @locks_mask: Mask to apply before accessing locks[] @@ -58,7 +59,8 @@ struct rhash_head { * @buckets: size * hash buckets */ struct bucket_table { - size_t size; + unsigned int size; + unsigned int rehash; u32 hash_rnd; u32 shift; unsigned int locks_mask; -- cgit v1.2.3 From c4db8848af6af92f90462258603be844baeab44d Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 14 Mar 2015 13:57:25 +1100 Subject: rhashtable: Move future_tbl into struct bucket_table This patch moves future_tbl to open up the possibility of having multiple rehashes on the same table. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index ed7562ad4ca0..1695378b3c5b 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -56,6 +56,7 @@ struct rhash_head { * @locks: Array of spinlocks protecting individual buckets * @walkers: List of active walkers * @rcu: RCU structure for freeing the table + * @future_tbl: Table under construction during rehashing * @buckets: size * hash buckets */ struct bucket_table { @@ -68,6 +69,8 @@ struct bucket_table { struct list_head walkers; struct rcu_head rcu; + struct bucket_table __rcu *future_tbl; + struct rhash_head __rcu *buckets[] ____cacheline_aligned_in_smp; }; @@ -105,7 +108,6 @@ struct rhashtable_params { /** * struct rhashtable - Hash table handle * @tbl: Bucket table - * @future_tbl: Table under construction during expansion/shrinking * @nelems: Number of elements in table * @p: Configuration parameters * @run_work: Deferred worker to expand/shrink asynchronously @@ -114,7 +116,6 @@ struct rhashtable_params { */ struct rhashtable { struct bucket_table __rcu *tbl; - struct bucket_table __rcu *future_tbl; atomic_t nelems; bool being_destroyed; struct rhashtable_params p; -- cgit v1.2.3 From 03e69b508b6f7c51743055c9f61d1dfeadf4b635 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sat, 14 Mar 2015 02:27:16 +0100 Subject: ebpf: add prandom helper for packet sampling This work is similar to commit 4cd3675ebf74 ("filter: added BPF random opcode") and adds a possibility for packet sampling in eBPF. Currently, this is only possible in classic BPF and useful to combine sampling with f.e. packet sockets, possible also with tc. Example function proto-type looks like: u32 (*prandom_u32)(void) = (void *)BPF_FUNC_get_prandom_u32; Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/bpf.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 80f2e0fc3d02..50bf95e29a96 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -154,4 +154,6 @@ extern const struct bpf_func_proto bpf_map_lookup_elem_proto; extern const struct bpf_func_proto bpf_map_update_elem_proto; extern const struct bpf_func_proto bpf_map_delete_elem_proto; +extern const struct bpf_func_proto bpf_get_prandom_u32_proto; + #endif /* _LINUX_BPF_H */ -- cgit v1.2.3 From c04167ce2ca0ecaeaafef006cb0d65cf01b68e42 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sat, 14 Mar 2015 02:27:17 +0100 Subject: ebpf: add helper for obtaining current processor id This patch adds the possibility to obtain raw_smp_processor_id() in eBPF. Currently, this is only possible in classic BPF where commit da2033c28226 ("filter: add SKF_AD_RXHASH and SKF_AD_CPU") has added facilities for this. Perhaps most importantly, this would also allow us to track per CPU statistics with eBPF maps, or to implement a poor-man's per CPU data structure through eBPF maps. Example function proto-type looks like: u32 (*smp_processor_id)(void) = (void *)BPF_FUNC_get_smp_processor_id; Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 50bf95e29a96..30bfd331882a 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -155,5 +155,6 @@ extern const struct bpf_func_proto bpf_map_update_elem_proto; extern const struct bpf_func_proto bpf_map_delete_elem_proto; extern const struct bpf_func_proto bpf_get_prandom_u32_proto; +extern const struct bpf_func_proto bpf_get_smp_processor_id_proto; #endif /* _LINUX_BPF_H */ -- cgit v1.2.3 From 9bac3d6d548e5cc925570b263f35b70a00a00ffd Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Fri, 13 Mar 2015 11:57:42 -0700 Subject: bpf: allow extended BPF programs access skb fields introduce user accessible mirror of in-kernel 'struct sk_buff': struct __sk_buff { __u32 len; __u32 pkt_type; __u32 mark; __u32 queue_mapping; }; bpf programs can do: int bpf_prog(struct __sk_buff *skb) { __u32 var = skb->pkt_type; which will be compiled to bpf assembler as: dst_reg = *(u32 *)(src_reg + 4) // 4 == offsetof(struct __sk_buff, pkt_type) bpf verifier will check validity of access and will convert it to: dst_reg = *(u8 *)(src_reg + offsetof(struct sk_buff, __pkt_type_offset)) dst_reg &= 7 since skb->pkt_type is a bitfield. Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 30bfd331882a..280a315de8d6 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -103,6 +103,9 @@ struct bpf_verifier_ops { * with 'type' (read or write) is allowed */ bool (*is_valid_access)(int off, int size, enum bpf_access_type type); + + u32 (*convert_ctx_access)(int dst_reg, int src_reg, int ctx_off, + struct bpf_insn *insn); }; struct bpf_prog_type_list { @@ -133,7 +136,7 @@ struct bpf_map *bpf_map_get(struct fd f); void bpf_map_put(struct bpf_map *map); /* verify correctness of eBPF program */ -int bpf_check(struct bpf_prog *fp, union bpf_attr *attr); +int bpf_check(struct bpf_prog **fp, union bpf_attr *attr); #else static inline void bpf_register_prog_type(struct bpf_prog_type_list *tl) { -- cgit v1.2.3 From 4170604feec780d00e7511c24fa0f6e5c2e4ed75 Mon Sep 17 00:00:00 2001 From: Scott Feldman Date: Sun, 15 Mar 2015 21:07:14 -0700 Subject: switchdev: add swdev ops As discussed at netconf, introduce swdev_ops as first step to move switchdev ops from ndo to swdev. This will keep switchdev from cluttering up ndo ops space. Signed-off-by: Scott Feldman Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ddab1a2a07a0..9e8a2a933c68 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1577,6 +1577,9 @@ struct net_device { const struct net_device_ops *netdev_ops; const struct ethtool_ops *ethtool_ops; const struct forwarding_accel_ops *fwd_ops; +#ifdef CONFIG_NET_SWITCHDEV + const struct swdev_ops *swdev_ops; +#endif const struct header_ops *header_ops; -- cgit v1.2.3 From 812a1c3ff3ee9d5100e0e71edb06681014e84a9b Mon Sep 17 00:00:00 2001 From: Scott Feldman Date: Sun, 15 Mar 2015 21:07:16 -0700 Subject: netdev: remove ndo ops for switchdev Signed-off-by: Scott Feldman Signed-off-by: David S. Miller --- include/linux/netdevice.h | 38 -------------------------------------- 1 file changed, 38 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 9e8a2a933c68..dd1d069758be 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -768,8 +768,6 @@ struct netdev_phys_item_id { typedef u16 (*select_queue_fallback_t)(struct net_device *dev, struct sk_buff *skb); -struct fib_info; - /* * This structure defines the management hooks for network devices. * The following hooks can be defined; unless noted otherwise, they are @@ -1024,23 +1022,6 @@ struct fib_info; * be otherwise expressed by feature flags. The check is called with * the set of features that the stack has calculated and it returns * those the driver believes to be appropriate. - * - * int (*ndo_switch_parent_id_get)(struct net_device *dev, - * struct netdev_phys_item_id *psid); - * Called to get an ID of the switch chip this port is part of. - * If driver implements this, it indicates that it represents a port - * of a switch chip. - * int (*ndo_switch_port_stp_update)(struct net_device *dev, u8 state); - * Called to notify switch device port of bridge port STP - * state change. - * int (*ndo_sw_parent_fib_ipv4_add)(struct net_device *dev, __be32 dst, - * int dst_len, struct fib_info *fi, - * u8 tos, u8 type, u32 nlflags, u32 tb_id); - * Called to add/modify IPv4 route to switch device. - * int (*ndo_sw_parent_fib_ipv4_del)(struct net_device *dev, __be32 dst, - * int dst_len, struct fib_info *fi, - * u8 tos, u8 type, u32 tb_id); - * Called to delete IPv4 route from switch device. */ struct net_device_ops { int (*ndo_init)(struct net_device *dev); @@ -1197,25 +1178,6 @@ struct net_device_ops { netdev_features_t (*ndo_features_check) (struct sk_buff *skb, struct net_device *dev, netdev_features_t features); -#ifdef CONFIG_NET_SWITCHDEV - int (*ndo_switch_parent_id_get)(struct net_device *dev, - struct netdev_phys_item_id *psid); - int (*ndo_switch_port_stp_update)(struct net_device *dev, - u8 state); - int (*ndo_switch_fib_ipv4_add)(struct net_device *dev, - __be32 dst, - int dst_len, - struct fib_info *fi, - u8 tos, u8 type, - u32 nlflags, - u32 tb_id); - int (*ndo_switch_fib_ipv4_del)(struct net_device *dev, - __be32 dst, - int dst_len, - struct fib_info *fi, - u8 tos, u8 type, - u32 tb_id); -#endif }; /** -- cgit v1.2.3 From c055d5b03bb4cb69d349d787c9787c0383abd8b2 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 10 Mar 2015 05:08:19 +0100 Subject: netfilter: bridge: query conntrack about skb dnat ask conntrack instead of storing ipv4 address in nf_bridge_info->data. Ths avoids the need to use ->data during NF_PRE_ROUTING. Only two functions that need ->data remain. These will be addressed in followup patches. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_bridge.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h index bb39113ea596..de123d769ffc 100644 --- a/include/linux/netfilter_bridge.h +++ b/include/linux/netfilter_bridge.h @@ -54,12 +54,6 @@ static inline unsigned int nf_bridge_pad(const struct sk_buff *skb) return 0; } -struct bridge_skb_cb { - union { - __be32 ipv4; - } daddr; -}; - static inline void br_drop_fake_rtable(struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); -- cgit v1.2.3 From e4bb9bcbfb7d67431dfd49860f62770a7f40193b Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 10 Mar 2015 10:36:48 +0100 Subject: netfilter: bridge: remove BRNF_STATE_BRIDGED flag Its not needed anymore since 2bf540b73ed5b ([NETFILTER]: bridge-netfilter: remove deferred hooks). Before this it was possible to have physoutdev set for locally generated packets -- this isn't the case anymore: BRNF_STATE_BRIDGED flag is set when we assign nf_bridge->physoutdev, so physoutdev != NULL means BRNF_STATE_BRIDGED is set. If physoutdev is NULL, then we are looking at locally-delivered and routed packet. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_bridge.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h index de123d769ffc..ed0d3bf953c3 100644 --- a/include/linux/netfilter_bridge.h +++ b/include/linux/netfilter_bridge.h @@ -19,7 +19,6 @@ enum nf_br_hook_priorities { #define BRNF_PKT_TYPE 0x01 #define BRNF_BRIDGED_DNAT 0x02 -#define BRNF_BRIDGED 0x04 #define BRNF_NF_BRIDGE_PREROUTING 0x08 #define BRNF_8021Q 0x10 #define BRNF_PPPoE 0x20 -- cgit v1.2.3 From 9439ce00f208d95703a6725e4ea986dd90e37ffd Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 17 Mar 2015 18:32:29 -0700 Subject: tcp: rename struct tcp_request_sock listener The listener field in struct tcp_request_sock is a pointer back to the listener. We now have req->rsk_listener, so TCP only needs one boolean and not a full pointer. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/tcp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 97dbf16f7d9d..f869ae8afbaf 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -111,7 +111,7 @@ struct tcp_request_sock_ops; struct tcp_request_sock { struct inet_request_sock req; const struct tcp_request_sock_ops *af_specific; - struct sock *listener; /* needed for TFO */ + bool tfo_listener; u32 rcv_isn; u32 snt_isn; u32 snt_synack; /* synack sent time */ -- cgit v1.2.3 From 6aebd940840a4d3a0a8ffc5883d3892f4bd61e90 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 18 Mar 2015 20:01:15 +1100 Subject: rhashtable: Remove shift from bucket_table Keeping both size and shift is silly. We only need one. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 1695378b3c5b..f16e85692959 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -51,7 +51,6 @@ struct rhash_head { * @size: Number of hash buckets * @rehash: Current bucket being rehashed * @hash_rnd: Random seed to fold into hash - * @shift: Current size (1 << shift) * @locks_mask: Mask to apply before accessing locks[] * @locks: Array of spinlocks protecting individual buckets * @walkers: List of active walkers @@ -63,7 +62,6 @@ struct bucket_table { unsigned int size; unsigned int rehash; u32 hash_rnd; - u32 shift; unsigned int locks_mask; spinlock_t *locks; struct list_head walkers; -- cgit v1.2.3 From c2e213cff701fce71a0aba8de82f2c2a4acf52ae Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 18 Mar 2015 20:01:16 +1100 Subject: rhashtable: Introduce max_size/min_size This patch adds the parameters max_size and min_size which are meant to replace max_shift and min_shift. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index f16e85692959..81267fef85d7 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -85,6 +85,8 @@ struct rhashtable; * @head_offset: Offset of rhash_head in struct to be hashed * @max_shift: Maximum number of shifts while expanding * @min_shift: Minimum number of shifts while shrinking + * @max_size: Maximum size while expanding + * @min_size: Minimum size while shrinking * @nulls_base: Base value to generate nulls marker * @locks_mul: Number of bucket locks to allocate per cpu (default: 128) * @hashfn: Function to hash key @@ -97,6 +99,8 @@ struct rhashtable_params { size_t head_offset; size_t max_shift; size_t min_shift; + unsigned int max_size; + unsigned int min_size; u32 nulls_base; size_t locks_mul; rht_hashfn_t hashfn; -- cgit v1.2.3 From e2e21c1c5808e5dfd88d3606cd6386cf85f6f5b1 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 18 Mar 2015 20:01:21 +1100 Subject: rhashtable: Remove max_shift and min_shift Now that nobody uses max_shift and min_shift, we can safely remove them. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 81267fef85d7..99425f2be708 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -83,8 +83,6 @@ struct rhashtable; * @key_len: Length of key * @key_offset: Offset of key in struct to be hashed * @head_offset: Offset of rhash_head in struct to be hashed - * @max_shift: Maximum number of shifts while expanding - * @min_shift: Minimum number of shifts while shrinking * @max_size: Maximum size while expanding * @min_size: Minimum size while shrinking * @nulls_base: Base value to generate nulls marker @@ -97,8 +95,6 @@ struct rhashtable_params { size_t key_len; size_t key_offset; size_t head_offset; - size_t max_shift; - size_t min_shift; unsigned int max_size; unsigned int min_size; u32 nulls_base; -- cgit v1.2.3 From f0b7d43c8a28155f50adb087a563cfc97566e477 Mon Sep 17 00:00:00 2001 From: Brad Campbell Date: Tue, 17 Mar 2015 16:25:46 -0400 Subject: cc2520: Add support for CC2591 amplifier. The TI CC2521 is an RF power amplifier that is designed to interface with the CC2520. Conveniently, it directly interfaces with the CC2520 and does not require any pins to be connected to a microcontroller/processor. Adding a CC2591 increases the CC2520's range, which is useful for border router and other wall-powered applications. Using the CC2591 with the CC2520 requires configuring the CC2520 GPIOs that are connected to the CC2591 to correctly set the CC2591 into TX and RX modes. Further, TI recommends that the CC2520_TXPOWER and CC2520_AGCCTRL1 registers are set differently to maximize the CC2591's performance. These settings are covered in TI Application Note AN065. This patch adds an optional `amplified` field to the cc2520 entry in the device tree. If present, the CC2520 will be configured to operate with a CC2591. The expected pin mapping is: CC2520 GPIO0 --> CC2591 EN CC2520 GPIO5 --> CC2591 PAEN Signed-off-by: Brad Campbell Acked-by: Varka Bhadram Signed-off-by: Marcel Holtmann --- include/linux/spi/cc2520.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/spi/cc2520.h b/include/linux/spi/cc2520.h index 85b8ee67e937..e741e8baad92 100644 --- a/include/linux/spi/cc2520.h +++ b/include/linux/spi/cc2520.h @@ -21,6 +21,7 @@ struct cc2520_platform_data { int sfd; int reset; int vreg; + bool amplified; }; #endif -- cgit v1.2.3 From 822b3b2ebfff8e9b3d006086c527738a7ca00cd0 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Wed, 18 Mar 2015 14:57:33 +0200 Subject: net: Add max rate tx queue attribute This adds a tx_maxrate attribute to the tx queue sysfs entry allowing for max-rate limiting. Along with DCB-ETS and BQL this provides another knob to tune queue performance. The limit units are Mbps. By default it is disabled. To disable the rate limitation after it has been set for a queue, it should be set to zero. Signed-off-by: John Fastabend Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/netdevice.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index dd1d069758be..76c5de4978a8 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -587,6 +587,7 @@ struct netdev_queue { #ifdef CONFIG_BQL struct dql dql; #endif + unsigned long tx_maxrate; } ____cacheline_aligned_in_smp; static inline int netdev_queue_numa_node_read(const struct netdev_queue *q) @@ -1022,6 +1023,10 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev, * be otherwise expressed by feature flags. The check is called with * the set of features that the stack has calculated and it returns * those the driver believes to be appropriate. + * int (*ndo_set_tx_maxrate)(struct net_device *dev, + * int queue_index, u32 maxrate); + * Called when a user wants to set a max-rate limitation of specific + * TX queue. */ struct net_device_ops { int (*ndo_init)(struct net_device *dev); @@ -1178,6 +1183,9 @@ struct net_device_ops { netdev_features_t (*ndo_features_check) (struct sk_buff *skb, struct net_device *dev, netdev_features_t features); + int (*ndo_set_tx_maxrate)(struct net_device *dev, + int queue_index, + u32 maxrate); }; /** -- cgit v1.2.3 From fc31e2560a2443410fe45c27116fae736541a7b5 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Wed, 18 Mar 2015 14:57:34 +0200 Subject: net/mlx4_core: Add basic support for QP max-rate limiting Add the low-level device commands and definitions used for QP max-rate limiting. This is done through the following elements: - read rate-limit device caps in QUERY_DEV_CAP: number of different rates and the min/max rates in Kbs/Mbs/Gbs units - enhance the QP context struct to contain rate limit units and value - allow to do run time rate-limit setting to QPs through the update-qp firmware command - QP rate-limiting is disallowed for VFs Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/mlx4/device.h | 17 +++++++++++++++++ include/linux/mlx4/qp.h | 14 ++++++++++---- 2 files changed, 27 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 1cc54822b931..4550c67b92e4 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -205,6 +205,7 @@ enum { MLX4_DEV_CAP_FLAG2_RECOVERABLE_ERROR_EVENT = 1LL << 20, MLX4_DEV_CAP_FLAG2_PORT_REMAP = 1LL << 21, MLX4_DEV_CAP_FLAG2_QCN = 1LL << 22, + MLX4_DEV_CAP_FLAG2_QP_RATE_LIMIT = 1LL << 23 }; enum { @@ -450,6 +451,21 @@ enum mlx4_module_id { MLX4_MODULE_ID_QSFP28 = 0x11, }; +enum { /* rl */ + MLX4_QP_RATE_LIMIT_NONE = 0, + MLX4_QP_RATE_LIMIT_KBS = 1, + MLX4_QP_RATE_LIMIT_MBS = 2, + MLX4_QP_RATE_LIMIT_GBS = 3 +}; + +struct mlx4_rate_limit_caps { + u16 num_rates; /* Number of different rates */ + u8 min_unit; + u16 min_val; + u8 max_unit; + u16 max_val; +}; + static inline u64 mlx4_fw_ver(u64 major, u64 minor, u64 subminor) { return (major << 32) | (minor << 16) | subminor; @@ -565,6 +581,7 @@ struct mlx4_caps { u32 dmfs_high_rate_qpn_base; u32 dmfs_high_rate_qpn_range; u32 vf_caps; + struct mlx4_rate_limit_caps rl_caps; }; struct mlx4_buf_list { diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h index 551f85456c11..1023ebe035b7 100644 --- a/include/linux/mlx4/qp.h +++ b/include/linux/mlx4/qp.h @@ -207,14 +207,16 @@ struct mlx4_qp_context { __be32 msn; __be16 rq_wqe_counter; __be16 sq_wqe_counter; - u32 reserved3[2]; + u32 reserved3; + __be16 rate_limit_params; + __be16 reserved4; __be32 param3; __be32 nummmcpeers_basemkey; u8 log_page_size; - u8 reserved4[2]; + u8 reserved5[2]; u8 mtt_base_addr_h; __be32 mtt_base_addr_l; - u32 reserved5[10]; + u32 reserved6[10]; }; struct mlx4_update_qp_context { @@ -229,6 +231,7 @@ struct mlx4_update_qp_context { enum { MLX4_UPD_QP_MASK_PM_STATE = 32, MLX4_UPD_QP_MASK_VSD = 33, + MLX4_UPD_QP_MASK_RATE_LIMIT = 35, }; enum { @@ -428,7 +431,8 @@ struct mlx4_wqe_inline_seg { enum mlx4_update_qp_attr { MLX4_UPDATE_QP_SMAC = 1 << 0, MLX4_UPDATE_QP_VSD = 1 << 1, - MLX4_UPDATE_QP_SUPPORTED_ATTRS = (1 << 2) - 1 + MLX4_UPDATE_QP_RATE_LIMIT = 1 << 2, + MLX4_UPDATE_QP_SUPPORTED_ATTRS = (1 << 3) - 1 }; enum mlx4_update_qp_params_flags { @@ -438,6 +442,8 @@ enum mlx4_update_qp_params_flags { struct mlx4_update_qp_params { u8 smac_index; u32 flags; + u16 rate_unit; + u16 rate_val; }; int mlx4_update_qp(struct mlx4_dev *dev, u32 qpn, -- cgit v1.2.3 From 6eada0110c8984477f5f1e57a0b7f7b2fc841e30 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 18 Mar 2015 14:05:33 -0700 Subject: netns: constify net_hash_mix() and various callers const qualifiers ease code review by making clear which objects are not written in a function. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/udp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/udp.h b/include/linux/udp.h index 247cfdcc4b08..87c094961bd5 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -34,7 +34,7 @@ static inline struct udphdr *inner_udp_hdr(const struct sk_buff *skb) #define UDP_HTABLE_SIZE_MIN (CONFIG_BASE_SMALL ? 128 : 256) -static inline int udp_hashfn(struct net *net, unsigned num, unsigned mask) +static inline u32 udp_hashfn(const struct net *net, u32 num, u32 mask) { return (num + net_hash_mix(net)) & mask; } -- cgit v1.2.3 From 54ff9ef36bdf84d469a098cbf8e2a103fbc77054 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Wed, 18 Mar 2015 14:50:43 -0300 Subject: ipv4, ipv6: kill ip_mc_{join, leave}_group and ipv6_sock_mc_{join, drop} in favor of their inner __ ones, which doesn't grab rtnl. As these functions need to operate on a locked socket, we can't be grabbing rtnl by then. It's too late and doing so causes reversed locking. So this patch: - move rtnl handling to callers instead while already fixing some reversed locking situations, like on vxlan and ipvs code. - renames __ ones to not have the __ mark: __ip_mc_{join,leave}_group -> ip_mc_{join,leave}_group __ipv6_sock_mc_{join,drop} -> ipv6_sock_mc_{join,drop} Signed-off-by: Marcelo Ricardo Leitner Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/linux/igmp.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/igmp.h b/include/linux/igmp.h index b5a6470e686c..2c677afeea47 100644 --- a/include/linux/igmp.h +++ b/include/linux/igmp.h @@ -111,9 +111,7 @@ struct ip_mc_list { extern int ip_check_mc_rcu(struct in_device *dev, __be32 mc_addr, __be32 src_addr, u16 proto); extern int igmp_rcv(struct sk_buff *); -extern int __ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr); extern int ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr); -extern int __ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr); extern int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr); extern void ip_mc_drop_socket(struct sock *sk); extern int ip_mc_source(int add, int omode, struct sock *sk, -- cgit v1.2.3 From db24a9044ee191c397dcd1c6574f56d67d7c8df5 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 17 Mar 2015 20:23:15 -0600 Subject: net: add support for phys_port_name Similar to port id allow netdevices to specify port names and export the name via sysfs. Drivers can implement the netdevice operation to assist udev in having sane default names for the devices using the rule: $ cat /etc/udev/rules.d/80-net-setup-link.rules SUBSYSTEM=="net", ACTION=="add", ATTR{phys_port_name}!="", NAME="$attr{phys_port_name}" Use of phys_name versus phys_id was suggested-by Jiri Pirko. Signed-off-by: David Ahern Acked-by: Jiri Pirko Acked-by: Scott Feldman Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 76c5de4978a8..ec8f9b5f6500 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1164,6 +1164,8 @@ struct net_device_ops { bool new_carrier); int (*ndo_get_phys_port_id)(struct net_device *dev, struct netdev_phys_item_id *ppid); + int (*ndo_get_phys_port_name)(struct net_device *dev, + char *name, size_t len); void (*ndo_add_vxlan_port)(struct net_device *dev, sa_family_t sa_family, __be16 port); @@ -2947,6 +2949,8 @@ int dev_set_mac_address(struct net_device *, struct sockaddr *); int dev_change_carrier(struct net_device *, bool new_carrier); int dev_get_phys_port_id(struct net_device *dev, struct netdev_phys_item_id *ppid); +int dev_get_phys_port_name(struct net_device *dev, + char *name, size_t len); struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev); struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq, int *ret); -- cgit v1.2.3 From 99c4a26a159b28fa46a3e746a9b41b297e73d261 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 18 Mar 2015 22:52:33 -0400 Subject: net: Fix high overhead of vlan sub-device teardown. When a networking device is taken down that has a non-trivial number of VLAN devices configured under it, we eat a full synchronize_net() for every such VLAN device. This is because of the call chain: NETDEV_DOWN notifier --> vlan_device_event() --> dev_change_flags() --> __dev_change_flags() --> __dev_close() --> __dev_close_many() --> dev_deactivate_many() --> synchronize_net() This is kind of rediculous because we already have infrastructure for batching doing operation X to a list of net devices so that we only incur one sync. So make use of that by exporting dev_close_many() and adjusting it's interfaace so that the caller can fully manage the batch list. Use this in vlan_device_event() and all the overhead goes away. Reported-by: Salam Noureddine Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ec8f9b5f6500..76951c5fbedf 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2156,6 +2156,7 @@ struct net_device *__dev_get_by_name(struct net *net, const char *name); int dev_alloc_name(struct net_device *dev, const char *name); int dev_open(struct net_device *dev); int dev_close(struct net_device *dev); +int dev_close_many(struct list_head *head, bool unlink); void dev_disable_lro(struct net_device *dev); int dev_loopback_xmit(struct sk_buff *newskb); int dev_queue_xmit(struct sk_buff *skb); -- cgit v1.2.3 From 25911556283e5093fca235d7ba03faae7ed5dbf2 Mon Sep 17 00:00:00 2001 From: Arend van Spriel Date: Wed, 18 Mar 2015 13:25:26 +0100 Subject: brcmfmac: add support for BCM43430 SDIO chipset This patch added support for the BCM43430 802.11n SDIO chipset. Reviewed-by: Hante Meuleman Reviewed-by: Daniel (Deognyoun) Kim Reviewed-by: Franky (Zhenhui) Lin Reviewed-by: Pieter-Paul Giesberts Signed-off-by: Arend van Spriel Signed-off-by: Kalle Valo --- include/linux/mmc/sdio_ids.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mmc/sdio_ids.h b/include/linux/mmc/sdio_ids.h index 91397858dc95..83430f2ea757 100644 --- a/include/linux/mmc/sdio_ids.h +++ b/include/linux/mmc/sdio_ids.h @@ -33,6 +33,7 @@ #define SDIO_DEVICE_ID_BROADCOM_43341 0xa94d #define SDIO_DEVICE_ID_BROADCOM_4335_4339 0x4335 #define SDIO_DEVICE_ID_BROADCOM_43362 0xa962 +#define SDIO_DEVICE_ID_BROADCOM_43430 0xa9a6 #define SDIO_DEVICE_ID_BROADCOM_4345 0x4345 #define SDIO_DEVICE_ID_BROADCOM_4354 0x4354 -- cgit v1.2.3 From 488fb86ee91d3b1182c2e30a9f9b45da14eda46f Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 20 Mar 2015 21:56:59 +1100 Subject: rhashtable: Make rhashtable_init params argument const This patch marks the rhashtable_init params argument const as there is no reason to modify it since we will always make a copy of it in the rhashtable. This patch also fixes a bug where we don't actually round up the value of min_size unless it is less than HASH_MIN_SIZE. Signed-off-by: Herbert Xu Acked-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 99425f2be708..c85363c45fc0 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -181,7 +181,8 @@ static inline int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, } #endif /* CONFIG_PROVE_LOCKING */ -int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params); +int rhashtable_init(struct rhashtable *ht, + const struct rhashtable_params *params); void rhashtable_insert(struct rhashtable *ht, struct rhash_head *node); bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *node); -- cgit v1.2.3 From 02fd97c3d4a8a14e222b0021c366db7041d28743 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 20 Mar 2015 21:57:00 +1100 Subject: rhashtable: Allow hash/comparison functions to be inlined This patch deals with the complaint that we make indirect function calls on the fast paths unnecessarily in rhashtable. We resolve it by moving the fast paths into inline functions that take struct rhashtable_param (which obviously must be the same set of parameters supplied to rhashtable_init) as an argument. The only remaining indirect call is to obj_hashfn (or key_hashfn it obj_hashfn is unset) on the rehash as well as the insert-during- rehash slow path. This patch also extends the support of vairable-length keys to include those where the key is fixed but scattered in the object. For example, in netlink we want to key off the namespace and the portid but they're not next to each other. This patch does this by directly using the object hash function as the indicator of whether the key is accessible or not. It also adds a new function obj_cmpfn to compare a key against an object. This means that the caller no longer needs to supply explicit compare functions. All this is done in a backwards compatible manner so no existing users are affected until they convert to the new interface. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 386 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 386 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index c85363c45fc0..a7188eeb135b 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -22,6 +22,7 @@ #include #include #include +#include /* * The end of the chain is marked with a special nulls marks which has @@ -42,6 +43,9 @@ #define RHT_HASH_BITS 27 #define RHT_BASE_SHIFT RHT_HASH_BITS +/* Base bits plus 1 bit for nulls marker */ +#define RHT_HASH_RESERVED_SPACE (RHT_BASE_BITS + 1) + struct rhash_head { struct rhash_head __rcu *next; }; @@ -72,8 +76,20 @@ struct bucket_table { struct rhash_head __rcu *buckets[] ____cacheline_aligned_in_smp; }; +/** + * struct rhashtable_compare_arg - Key for the function rhashtable_compare + * @ht: Hash table + * @key: Key to compare against + */ +struct rhashtable_compare_arg { + struct rhashtable *ht; + const void *key; +}; + typedef u32 (*rht_hashfn_t)(const void *data, u32 len, u32 seed); typedef u32 (*rht_obj_hashfn_t)(const void *data, u32 seed); +typedef int (*rht_obj_cmpfn_t)(struct rhashtable_compare_arg *arg, + const void *obj); struct rhashtable; @@ -89,6 +105,7 @@ struct rhashtable; * @locks_mul: Number of bucket locks to allocate per cpu (default: 128) * @hashfn: Function to hash key * @obj_hashfn: Function to hash object + * @obj_cmpfn: Function to compare key with object */ struct rhashtable_params { size_t nelem_hint; @@ -101,6 +118,7 @@ struct rhashtable_params { size_t locks_mul; rht_hashfn_t hashfn; rht_obj_hashfn_t obj_hashfn; + rht_obj_cmpfn_t obj_cmpfn; }; /** @@ -165,6 +183,83 @@ static inline unsigned long rht_get_nulls_value(const struct rhash_head *ptr) return ((unsigned long) ptr) >> 1; } +static inline void *rht_obj(const struct rhashtable *ht, + const struct rhash_head *he) +{ + return (char *)he - ht->p.head_offset; +} + +static inline unsigned int rht_bucket_index(const struct bucket_table *tbl, + unsigned int hash) +{ + return (hash >> RHT_HASH_RESERVED_SPACE) & (tbl->size - 1); +} + +static inline unsigned int rht_key_hashfn( + struct rhashtable *ht, const struct bucket_table *tbl, + const void *key, const struct rhashtable_params params) +{ + return rht_bucket_index(tbl, params.hashfn(key, params.key_len ?: + ht->p.key_len, + tbl->hash_rnd)); +} + +static inline unsigned int rht_head_hashfn( + struct rhashtable *ht, const struct bucket_table *tbl, + const struct rhash_head *he, const struct rhashtable_params params) +{ + const char *ptr = rht_obj(ht, he); + + return likely(params.obj_hashfn) ? + rht_bucket_index(tbl, params.obj_hashfn(ptr, tbl->hash_rnd)) : + rht_key_hashfn(ht, tbl, ptr + params.key_offset, params); +} + +/** + * rht_grow_above_75 - returns true if nelems > 0.75 * table-size + * @ht: hash table + * @tbl: current table + */ +static inline bool rht_grow_above_75(const struct rhashtable *ht, + const struct bucket_table *tbl) +{ + /* Expand table when exceeding 75% load */ + return atomic_read(&ht->nelems) > (tbl->size / 4 * 3) && + (!ht->p.max_size || tbl->size < ht->p.max_size); +} + +/** + * rht_shrink_below_30 - returns true if nelems < 0.3 * table-size + * @ht: hash table + * @tbl: current table + */ +static inline bool rht_shrink_below_30(const struct rhashtable *ht, + const struct bucket_table *tbl) +{ + /* Shrink table beneath 30% load */ + return atomic_read(&ht->nelems) < (tbl->size * 3 / 10) && + tbl->size > ht->p.min_size; +} + +/* The bucket lock is selected based on the hash and protects mutations + * on a group of hash buckets. + * + * A maximum of tbl->size/2 bucket locks is allocated. This ensures that + * a single lock always covers both buckets which may both contains + * entries which link to the same bucket of the old table during resizing. + * This allows to simplify the locking as locking the bucket in both + * tables during resize always guarantee protection. + * + * IMPORTANT: When holding the bucket lock of both the old and new table + * during expansions and shrinking, the old bucket lock must always be + * acquired first. + */ +static inline spinlock_t *rht_bucket_lock(const struct bucket_table *tbl, + unsigned int hash) +{ + return &tbl->locks[hash & tbl->locks_mask]; +} + #ifdef CONFIG_PROVE_LOCKING int lockdep_rht_mutex_is_held(struct rhashtable *ht); int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash); @@ -184,6 +279,9 @@ static inline int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, int rhashtable_init(struct rhashtable *ht, const struct rhashtable_params *params); +int rhashtable_insert_slow(struct rhashtable *ht, const void *key, + struct rhash_head *obj, + struct bucket_table *old_tbl); void rhashtable_insert(struct rhashtable *ht, struct rhash_head *node); bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *node); @@ -356,4 +454,292 @@ void rhashtable_destroy(struct rhashtable *ht); rht_for_each_entry_rcu_continue(tpos, pos, (tbl)->buckets[hash],\ tbl, hash, member) +static inline int rhashtable_compare(struct rhashtable_compare_arg *arg, + const void *obj) +{ + struct rhashtable *ht = arg->ht; + const char *ptr = obj; + + return memcmp(ptr + ht->p.key_offset, arg->key, ht->p.key_len); +} + +/** + * rhashtable_lookup_fast - search hash table, inlined version + * @ht: hash table + * @key: the pointer to the key + * @params: hash table parameters + * + * Computes the hash value for the key and traverses the bucket chain looking + * for a entry with an identical key. The first matching entry is returned. + * + * Returns the first entry on which the compare function returned true. + */ +static inline void *rhashtable_lookup_fast( + struct rhashtable *ht, const void *key, + const struct rhashtable_params params) +{ + struct rhashtable_compare_arg arg = { + .ht = ht, + .key = key, + }; + const struct bucket_table *tbl; + struct rhash_head *he; + unsigned hash; + + rcu_read_lock(); + + tbl = rht_dereference_rcu(ht->tbl, ht); +restart: + hash = rht_key_hashfn(ht, tbl, key, params); + rht_for_each_rcu(he, tbl, hash) { + if (params.obj_cmpfn ? + params.obj_cmpfn(&arg, rht_obj(ht, he)) : + rhashtable_compare(&arg, rht_obj(ht, he))) + continue; + rcu_read_unlock(); + return rht_obj(ht, he); + } + + /* Ensure we see any new tables. */ + smp_rmb(); + + tbl = rht_dereference_rcu(tbl->future_tbl, ht); + if (unlikely(tbl)) + goto restart; + rcu_read_unlock(); + + return NULL; +} + +static inline int __rhashtable_insert_fast( + struct rhashtable *ht, const void *key, struct rhash_head *obj, + const struct rhashtable_params params) +{ + struct rhashtable_compare_arg arg = { + .ht = ht, + .key = key, + }; + int err = -EEXIST; + struct bucket_table *tbl, *new_tbl; + struct rhash_head *head; + spinlock_t *lock; + unsigned hash; + + rcu_read_lock(); + + tbl = rht_dereference_rcu(ht->tbl, ht); + hash = rht_head_hashfn(ht, tbl, obj, params); + lock = rht_bucket_lock(tbl, hash); + + spin_lock_bh(lock); + + /* Because we have already taken the bucket lock in tbl, + * if we find that future_tbl is not yet visible then + * that guarantees all other insertions of the same entry + * will also grab the bucket lock in tbl because until + * the rehash completes ht->tbl won't be changed. + */ + new_tbl = rht_dereference_rcu(tbl->future_tbl, ht); + if (unlikely(new_tbl)) { + err = rhashtable_insert_slow(ht, key, obj, new_tbl); + goto out; + } + + if (!key) + goto skip_lookup; + + rht_for_each(head, tbl, hash) { + if (unlikely(!(params.obj_cmpfn ? + params.obj_cmpfn(&arg, rht_obj(ht, head)) : + rhashtable_compare(&arg, rht_obj(ht, head))))) + goto out; + } + +skip_lookup: + err = 0; + + head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash); + + RCU_INIT_POINTER(obj->next, head); + + rcu_assign_pointer(tbl->buckets[hash], obj); + + atomic_inc(&ht->nelems); + if (rht_grow_above_75(ht, tbl)) + schedule_work(&ht->run_work); + +out: + spin_unlock_bh(lock); + rcu_read_unlock(); + + return err; +} + +/** + * rhashtable_insert_fast - insert object into hash table + * @ht: hash table + * @obj: pointer to hash head inside object + * @params: hash table parameters + * + * Will take a per bucket spinlock to protect against mutual mutations + * on the same bucket. Multiple insertions may occur in parallel unless + * they map to the same bucket lock. + * + * It is safe to call this function from atomic context. + * + * Will trigger an automatic deferred table resizing if the size grows + * beyond the watermark indicated by grow_decision() which can be passed + * to rhashtable_init(). + */ +static inline int rhashtable_insert_fast( + struct rhashtable *ht, struct rhash_head *obj, + const struct rhashtable_params params) +{ + return __rhashtable_insert_fast(ht, NULL, obj, params); +} + +/** + * rhashtable_lookup_insert_fast - lookup and insert object into hash table + * @ht: hash table + * @obj: pointer to hash head inside object + * @params: hash table parameters + * + * Locks down the bucket chain in both the old and new table if a resize + * is in progress to ensure that writers can't remove from the old table + * and can't insert to the new table during the atomic operation of search + * and insertion. Searches for duplicates in both the old and new table if + * a resize is in progress. + * + * This lookup function may only be used for fixed key hash table (key_len + * parameter set). It will BUG() if used inappropriately. + * + * It is safe to call this function from atomic context. + * + * Will trigger an automatic deferred table resizing if the size grows + * beyond the watermark indicated by grow_decision() which can be passed + * to rhashtable_init(). + */ +static inline int rhashtable_lookup_insert_fast( + struct rhashtable *ht, struct rhash_head *obj, + const struct rhashtable_params params) +{ + const char *key = rht_obj(ht, obj); + + BUG_ON(ht->p.obj_hashfn); + + return __rhashtable_insert_fast(ht, key + ht->p.key_offset, obj, + params); +} + +/** + * rhashtable_lookup_insert_key - search and insert object to hash table + * with explicit key + * @ht: hash table + * @key: key + * @obj: pointer to hash head inside object + * @params: hash table parameters + * + * Locks down the bucket chain in both the old and new table if a resize + * is in progress to ensure that writers can't remove from the old table + * and can't insert to the new table during the atomic operation of search + * and insertion. Searches for duplicates in both the old and new table if + * a resize is in progress. + * + * Lookups may occur in parallel with hashtable mutations and resizing. + * + * Will trigger an automatic deferred table resizing if the size grows + * beyond the watermark indicated by grow_decision() which can be passed + * to rhashtable_init(). + * + * Returns zero on success. + */ +static inline int rhashtable_lookup_insert_key( + struct rhashtable *ht, const void *key, struct rhash_head *obj, + const struct rhashtable_params params) +{ + BUG_ON(!ht->p.obj_hashfn || !key); + + return __rhashtable_insert_fast(ht, key, obj, params); +} + +static inline int __rhashtable_remove_fast( + struct rhashtable *ht, struct bucket_table *tbl, + struct rhash_head *obj, const struct rhashtable_params params) +{ + struct rhash_head __rcu **pprev; + struct rhash_head *he; + spinlock_t * lock; + unsigned hash; + int err = -ENOENT; + + hash = rht_head_hashfn(ht, tbl, obj, params); + lock = rht_bucket_lock(tbl, hash); + + spin_lock_bh(lock); + + pprev = &tbl->buckets[hash]; + rht_for_each(he, tbl, hash) { + if (he != obj) { + pprev = &he->next; + continue; + } + + rcu_assign_pointer(*pprev, obj->next); + err = 0; + break; + } + + spin_unlock_bh(lock); + + return err; +} + +/** + * rhashtable_remove_fast - remove object from hash table + * @ht: hash table + * @obj: pointer to hash head inside object + * @params: hash table parameters + * + * Since the hash chain is single linked, the removal operation needs to + * walk the bucket chain upon removal. The removal operation is thus + * considerable slow if the hash table is not correctly sized. + * + * Will automatically shrink the table via rhashtable_expand() if the + * shrink_decision function specified at rhashtable_init() returns true. + * + * Returns zero on success, -ENOENT if the entry could not be found. + */ +static inline int rhashtable_remove_fast( + struct rhashtable *ht, struct rhash_head *obj, + const struct rhashtable_params params) +{ + struct bucket_table *tbl; + int err; + + rcu_read_lock(); + + tbl = rht_dereference_rcu(ht->tbl, ht); + + /* Because we have already taken (and released) the bucket + * lock in old_tbl, if we find that future_tbl is not yet + * visible then that guarantees the entry to still be in + * the old tbl if it exists. + */ + while ((err = __rhashtable_remove_fast(ht, tbl, obj, params)) && + (tbl = rht_dereference_rcu(tbl->future_tbl, ht))) + ; + + if (err) + goto out; + + atomic_dec(&ht->nelems); + if (rht_shrink_below_30(ht, tbl)) + schedule_work(&ht->run_work); + +out: + rcu_read_unlock(); + + return err; +} + #endif /* _LINUX_RHASHTABLE_H */ -- cgit v1.2.3 From dc0ee268d85026530720d8c874716287b7ede25b Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 20 Mar 2015 21:57:06 +1100 Subject: rhashtable: Rip out obsolete out-of-line interface Now that all rhashtable users have been converted over to the inline interface, this patch removes the unused out-of-line interface. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 19 +++---------------- 1 file changed, 3 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index a7188eeb135b..c3034de2c235 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -1,14 +1,13 @@ /* * Resizable, Scalable, Concurrent Hash Table * + * Copyright (c) 2015 Herbert Xu * Copyright (c) 2014 Thomas Graf * Copyright (c) 2008-2014 Patrick McHardy * - * Based on the following paper by Josh Triplett, Paul E. McKenney - * and Jonathan Walpole: - * https://www.usenix.org/legacy/event/atc11/tech/final_files/Triplett.pdf - * * Code partially derived from nft_hash + * Rewritten with rehash code from br_multicast plus single list + * pointer as suggested by Josh Triplett * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -282,22 +281,10 @@ int rhashtable_init(struct rhashtable *ht, int rhashtable_insert_slow(struct rhashtable *ht, const void *key, struct rhash_head *obj, struct bucket_table *old_tbl); -void rhashtable_insert(struct rhashtable *ht, struct rhash_head *node); -bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *node); int rhashtable_expand(struct rhashtable *ht); int rhashtable_shrink(struct rhashtable *ht); -void *rhashtable_lookup(struct rhashtable *ht, const void *key); -void *rhashtable_lookup_compare(struct rhashtable *ht, const void *key, - bool (*compare)(void *, void *), void *arg); - -bool rhashtable_lookup_insert(struct rhashtable *ht, struct rhash_head *obj); -bool rhashtable_lookup_compare_insert(struct rhashtable *ht, - struct rhash_head *obj, - bool (*compare)(void *, void *), - void *arg); - int rhashtable_walk_init(struct rhashtable *ht, struct rhashtable_iter *iter); void rhashtable_walk_exit(struct rhashtable_iter *iter); int rhashtable_walk_start(struct rhashtable_iter *iter) __acquires(RCU); -- cgit v1.2.3 From 6626af692692b52c8f9e20ad8201a3255e5ab25b Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 20 Mar 2015 18:18:45 -0400 Subject: rhashtable: Fix undeclared EEXIST build error on ia64 We need to include linux/errno.h in rhashtable.h since it doesn't always get included otherwise. Reported-by: kbuild test robot Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index c3034de2c235..89d102270570 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -18,6 +18,7 @@ #define _LINUX_RHASHTABLE_H #include +#include #include #include #include -- cgit v1.2.3 From d3593b5cef76db45c864de23c599b58198879e8c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 20 Mar 2015 17:15:19 -0700 Subject: Revert "selinux: add a skb_owned_by() hook" This reverts commit ca10b9e9a8ca7342ee07065289cbe74ac128c169. No longer needed after commit eb8895debe1baba41fcb62c78a16f0c63c21662a ("tcp: tcp_make_synack() should use sock_wmalloc") When under SYNFLOOD, we build lot of SYNACK and hit false sharing because of multiple modifications done on sk_listener->sk_wmem_alloc Since tcp_make_synack() uses sock_wmalloc(), there is no need to call skb_set_owner_w() again, as this adds two atomic operations. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/security.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/security.h b/include/linux/security.h index a1b7dbd127ff..25a079a7c3b3 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1716,7 +1716,6 @@ struct security_operations { int (*tun_dev_attach_queue) (void *security); int (*tun_dev_attach) (struct sock *sk, void *security); int (*tun_dev_open) (void *security); - void (*skb_owned_by) (struct sk_buff *skb, struct sock *sk); #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_NETWORK_XFRM @@ -2735,8 +2734,6 @@ int security_tun_dev_attach_queue(void *security); int security_tun_dev_attach(struct sock *sk, void *security); int security_tun_dev_open(void *security); -void security_skb_owned_by(struct sk_buff *skb, struct sock *sk); - #else /* CONFIG_SECURITY_NETWORK */ static inline int security_unix_stream_connect(struct sock *sock, struct sock *other, @@ -2928,11 +2925,6 @@ static inline int security_tun_dev_open(void *security) { return 0; } - -static inline void security_skb_owned_by(struct sk_buff *skb, struct sock *sk) -{ -} - #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_NETWORK_XFRM -- cgit v1.2.3 From 8d0451638ad3f7ccd5250c1dd90e06ad487b2703 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 18 Mar 2015 20:55:31 +0100 Subject: netfilter: bridge: kill nf_bridge_pad The br_netfilter frag output function calls skb_cow_head() so in case it needs a larger headroom to e.g. re-add a previously stripped PPPOE or VLAN header things will still work (at cost of reallocation). We can then move nf_bridge_encap_header_len to br_netfilter. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_bridge.h | 22 ---------------------- 1 file changed, 22 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h index ed0d3bf953c3..2734977199ca 100644 --- a/include/linux/netfilter_bridge.h +++ b/include/linux/netfilter_bridge.h @@ -23,18 +23,6 @@ enum nf_br_hook_priorities { #define BRNF_8021Q 0x10 #define BRNF_PPPoE 0x20 -static inline unsigned int nf_bridge_encap_header_len(const struct sk_buff *skb) -{ - switch (skb->protocol) { - case __cpu_to_be16(ETH_P_8021Q): - return VLAN_HLEN; - case __cpu_to_be16(ETH_P_PPP_SES): - return PPPOE_SES_HLEN; - default: - return 0; - } -} - static inline unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb) { if (unlikely(skb->nf_bridge->mask & BRNF_PPPoE)) @@ -44,15 +32,6 @@ static inline unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb) int br_handle_frame_finish(struct sk_buff *skb); -/* This is called by the IP fragmenting code and it ensures there is - * enough room for the encapsulating header (if there is one). */ -static inline unsigned int nf_bridge_pad(const struct sk_buff *skb) -{ - if (skb->nf_bridge) - return nf_bridge_encap_header_len(skb); - return 0; -} - static inline void br_drop_fake_rtable(struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); @@ -62,7 +41,6 @@ static inline void br_drop_fake_rtable(struct sk_buff *skb) } #else -#define nf_bridge_pad(skb) (0) #define br_drop_fake_rtable(skb) do { } while (0) #endif /* CONFIG_BRIDGE_NETFILTER */ -- cgit v1.2.3 From 2b290bbb60847c0897c047b5214192810de529df Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 10 Mar 2015 04:48:20 +0100 Subject: can: use sock_efree instead of own destructor It is identical to the can destructor. Signed-off-by: Florian Westphal Acked-by: Oliver Hartkopp Signed-off-by: Marc Kleine-Budde --- include/linux/can/skb.h | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/can/skb.h b/include/linux/can/skb.h index cc00d15c6107..b6a52a4b457a 100644 --- a/include/linux/can/skb.h +++ b/include/linux/can/skb.h @@ -44,16 +44,11 @@ static inline void can_skb_reserve(struct sk_buff *skb) skb_reserve(skb, sizeof(struct can_skb_priv)); } -static inline void can_skb_destructor(struct sk_buff *skb) -{ - sock_put(skb->sk); -} - static inline void can_skb_set_owner(struct sk_buff *skb, struct sock *sk) { if (sk) { sock_hold(sk); - skb->destructor = can_skb_destructor; + skb->destructor = sock_efree; skb->sk = sk; } } -- cgit v1.2.3 From c54eb70e3bb8cd0d7b8564bedab63e834656c567 Mon Sep 17 00:00:00 2001 From: Yegor Yefremov Date: Mon, 16 Mar 2015 09:38:13 +0100 Subject: can: add combined rx/tx LED trigger support Add -rxtx trigger, that will be activated both for tx as rx events. This trigger mimics "activity" LED for Ethernet devices. Signed-off-by: Yegor Yefremov Signed-off-by: Marc Kleine-Budde --- include/linux/can/dev.h | 2 ++ include/linux/can/led.h | 6 ++++-- 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index c05ff0f9f9a5..c3a9c8fc60fa 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -61,6 +61,8 @@ struct can_priv { char tx_led_trig_name[CAN_LED_NAME_SZ]; struct led_trigger *rx_led_trig; char rx_led_trig_name[CAN_LED_NAME_SZ]; + struct led_trigger *rxtx_led_trig; + char rxtx_led_trig_name[CAN_LED_NAME_SZ]; #endif }; diff --git a/include/linux/can/led.h b/include/linux/can/led.h index e0475c5cbb92..146de4506d21 100644 --- a/include/linux/can/led.h +++ b/include/linux/can/led.h @@ -21,8 +21,10 @@ enum can_led_event { #ifdef CONFIG_CAN_LEDS -/* keep space for interface name + "-tx"/"-rx" suffix and null terminator */ -#define CAN_LED_NAME_SZ (IFNAMSIZ + 4) +/* keep space for interface name + "-tx"/"-rx"/"-rxtx" + * suffix and null terminator + */ +#define CAN_LED_NAME_SZ (IFNAMSIZ + 6) void can_led_event(struct net_device *netdev, enum can_led_event event); void devm_can_led_init(struct net_device *netdev); -- cgit v1.2.3 From 0345f93138b2224e0d7ce91fcffdb3dd23f364d7 Mon Sep 17 00:00:00 2001 From: "tadeusz.struk@intel.com" Date: Thu, 19 Mar 2015 12:31:25 -0700 Subject: net: socket: add support for async operations Add support for async operations. Signed-off-by: Tadeusz Struk Signed-off-by: David S. Miller --- include/linux/socket.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/socket.h b/include/linux/socket.h index fab4d0ddf4ed..c9852ef7e317 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -51,6 +51,7 @@ struct msghdr { void *msg_control; /* ancillary data */ __kernel_size_t msg_controllen; /* ancillary data buffer length */ unsigned int msg_flags; /* flags on received message */ + struct kiocb *msg_iocb; /* ptr to iocb for async requests */ }; struct user_msghdr { -- cgit v1.2.3 From 42cb80a2353f42913ae78074ffa1f1b4a49e5436 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 22 Mar 2015 10:22:19 -0700 Subject: inet: remove sk_listener parameter from syn_ack_timeout() It is not needed, and req->sk_listener points to the listener anyway. request_sock argument can be const. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/dccp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 439ff698000a..3dca24d3ac67 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -317,6 +317,6 @@ static inline const char *dccp_role(const struct sock *sk) return NULL; } -extern void dccp_syn_ack_timeout(struct sock *sk, struct request_sock *req); +extern void dccp_syn_ack_timeout(const struct request_sock *req); #endif /* _LINUX_DCCP_H */ -- cgit v1.2.3 From 85645bab57bfc6b0b43bb96a301c4ef83925c07d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 22 Mar 2015 10:22:24 -0700 Subject: ipv4: dccp: handle ICMP messages on DCCP_NEW_SYN_RECV request sockets dccp_v4_err() can restrict lookups to ehash table, and not to listeners. Note this patch creates the infrastructure, but this means that ICMP messages for request sockets are ignored until complete conversion. New dccp_req_err() helper is exported so that we can use it in IPv6 in following patch. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/dccp.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 3dca24d3ac67..221025423e6c 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -43,6 +43,7 @@ enum dccp_state { DCCP_CLOSING = TCP_CLOSING, DCCP_TIME_WAIT = TCP_TIME_WAIT, DCCP_CLOSED = TCP_CLOSE, + DCCP_NEW_SYN_RECV = TCP_NEW_SYN_RECV, DCCP_PARTOPEN = TCP_MAX_STATES, DCCP_PASSIVE_CLOSEREQ, /* clients receiving CloseReq */ DCCP_MAX_STATES @@ -57,6 +58,7 @@ enum { DCCPF_CLOSING = TCPF_CLOSING, DCCPF_TIME_WAIT = TCPF_TIME_WAIT, DCCPF_CLOSED = TCPF_CLOSE, + DCCPF_NEW_SYN_RECV = TCPF_NEW_SYN_RECV, DCCPF_PARTOPEN = (1 << DCCP_PARTOPEN), }; -- cgit v1.2.3 From de91b25c8011089f5dd99b9d24743db1f550ca4b Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 24 Mar 2015 00:50:20 +1100 Subject: rhashtable: Eliminate unnecessary branch in rht_key_hashfn When rht_key_hashfn is called from rhashtable itself and params is equal to ht->p, there is no point in checking params.key_len and falling back to ht->p.key_len. For some reason gcc couldn't figure out that params is the same as ht->p. So let's help it by only checking params.key_len when it's a constant. Signed-off-by: Herbert Xu Acked-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 89d102270570..3851952781d7 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -199,8 +199,12 @@ static inline unsigned int rht_key_hashfn( struct rhashtable *ht, const struct bucket_table *tbl, const void *key, const struct rhashtable_params params) { - return rht_bucket_index(tbl, params.hashfn(key, params.key_len ?: - ht->p.key_len, + /* params must be equal to ht->p if it isn't constant. */ + unsigned key_len = __builtin_constant_p(params.key_len) ? + (params.key_len ?: ht->p.key_len) : + params.key_len; + + return rht_bucket_index(tbl, params.hashfn(key, key_len, tbl->hash_rnd)); } -- cgit v1.2.3 From 31ccde2dacea8375c3a7d6fffbf0060ee0d40214 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 24 Mar 2015 00:50:21 +1100 Subject: rhashtable: Allow hashfn to be unset Since every current rhashtable user uses jhash as their hash function, the fact that jhash is an inline function causes each user to generate a copy of its code. This function provides a solution to this problem by allowing hashfn to be unset. In which case rhashtable will automatically set it to jhash. Furthermore, if the key length is a multiple of 4, we will switch over to jhash2. Signed-off-by: Herbert Xu Acked-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 33 +++++++++++++++++++++++++++------ 1 file changed, 27 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 3851952781d7..bc2488b98321 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -19,6 +19,7 @@ #include #include +#include #include #include #include @@ -103,7 +104,7 @@ struct rhashtable; * @min_size: Minimum size while shrinking * @nulls_base: Base value to generate nulls marker * @locks_mul: Number of bucket locks to allocate per cpu (default: 128) - * @hashfn: Function to hash key + * @hashfn: Hash function (default: jhash2 if !(key_len % 4), or jhash) * @obj_hashfn: Function to hash object * @obj_cmpfn: Function to compare key with object */ @@ -125,6 +126,7 @@ struct rhashtable_params { * struct rhashtable - Hash table handle * @tbl: Bucket table * @nelems: Number of elements in table + * @key_len: Key length for hashfn * @p: Configuration parameters * @run_work: Deferred worker to expand/shrink asynchronously * @mutex: Mutex to protect current/future table swapping @@ -134,6 +136,7 @@ struct rhashtable { struct bucket_table __rcu *tbl; atomic_t nelems; bool being_destroyed; + unsigned int key_len; struct rhashtable_params p; struct work_struct run_work; struct mutex mutex; @@ -199,13 +202,31 @@ static inline unsigned int rht_key_hashfn( struct rhashtable *ht, const struct bucket_table *tbl, const void *key, const struct rhashtable_params params) { + unsigned hash; + /* params must be equal to ht->p if it isn't constant. */ - unsigned key_len = __builtin_constant_p(params.key_len) ? - (params.key_len ?: ht->p.key_len) : - params.key_len; + if (!__builtin_constant_p(params.key_len)) + hash = ht->p.hashfn(key, ht->key_len, tbl->hash_rnd); + else if (params.key_len) { + unsigned key_len = params.key_len; + + if (params.hashfn) + hash = params.hashfn(key, key_len, tbl->hash_rnd); + else if (key_len & (sizeof(u32) - 1)) + hash = jhash(key, key_len, tbl->hash_rnd); + else + hash = jhash2(key, key_len / sizeof(u32), + tbl->hash_rnd); + } else { + unsigned key_len = ht->p.key_len; + + if (params.hashfn) + hash = params.hashfn(key, key_len, tbl->hash_rnd); + else + hash = jhash(key, key_len, tbl->hash_rnd); + } - return rht_bucket_index(tbl, params.hashfn(key, key_len, - tbl->hash_rnd)); + return rht_bucket_index(tbl, hash); } static inline unsigned int rht_head_hashfn( -- cgit v1.2.3 From b824478b2145be78ac19e1cf44e2b9036c7a9608 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 24 Mar 2015 00:50:26 +1100 Subject: rhashtable: Add multiple rehash support This patch adds the missing bits to allow multiple rehashes. The read-side as well as remove already handle this correctly. So it's only the rehasher and insertion that need modification to handle this. Note that this patch doesn't actually enable it so for now rehashing is still only performed by the worker thread. This patch also disables the explicit expand/shrink interface because the table is meant to expand and shrink automatically, and continuing to export these interfaces unnecessarily complicates the life of the rehasher since the rehash process is now composed of two parts. Signed-off-by: Herbert Xu Acked-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index bc2488b98321..e8ffcdb5e239 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -308,9 +308,6 @@ int rhashtable_insert_slow(struct rhashtable *ht, const void *key, struct rhash_head *obj, struct bucket_table *old_tbl); -int rhashtable_expand(struct rhashtable *ht); -int rhashtable_shrink(struct rhashtable *ht); - int rhashtable_walk_init(struct rhashtable *ht, struct rhashtable_iter *iter); void rhashtable_walk_exit(struct rhashtable_iter *iter); int rhashtable_walk_start(struct rhashtable_iter *iter) __acquires(RCU); @@ -541,17 +538,22 @@ static inline int __rhashtable_insert_fast( rcu_read_lock(); tbl = rht_dereference_rcu(ht->tbl, ht); - hash = rht_head_hashfn(ht, tbl, obj, params); - lock = rht_bucket_lock(tbl, hash); - - spin_lock_bh(lock); - /* Because we have already taken the bucket lock in tbl, - * if we find that future_tbl is not yet visible then - * that guarantees all other insertions of the same entry - * will also grab the bucket lock in tbl because until - * the rehash completes ht->tbl won't be changed. + /* All insertions must grab the oldest table containing + * the hashed bucket that is yet to be rehashed. */ + for (;;) { + hash = rht_head_hashfn(ht, tbl, obj, params); + lock = rht_bucket_lock(tbl, hash); + spin_lock_bh(lock); + + if (tbl->rehash <= hash) + break; + + spin_unlock_bh(lock); + tbl = rht_dereference_rcu(tbl->future_tbl, ht); + } + new_tbl = rht_dereference_rcu(tbl->future_tbl, ht); if (unlikely(new_tbl)) { err = rhashtable_insert_slow(ht, key, obj, new_tbl); -- cgit v1.2.3 From ccd57b1bd32460d27bbb9c599e795628a3c66983 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 24 Mar 2015 00:50:28 +1100 Subject: rhashtable: Add immediate rehash during insertion This patch reintroduces immediate rehash during insertion. If we find during insertion that the table is full or the chain length exceeds a set limit (currently 16 but may be disabled with insecure_elasticity) then we will force an immediate rehash. The rehash will contain an expansion if the table utilisation exceeds 75%. If this rehash fails then the insertion will fail. Otherwise the insertion will be reattempted in the new hash table. Signed-off-by: Herbert Xu Acked-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 42 +++++++++++++++++++++++++++++++++++++----- 1 file changed, 37 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index e8ffcdb5e239..f9ecf32bce55 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -103,6 +103,7 @@ struct rhashtable; * @max_size: Maximum size while expanding * @min_size: Minimum size while shrinking * @nulls_base: Base value to generate nulls marker + * @insecure_elasticity: Set to true to disable chain length checks * @locks_mul: Number of bucket locks to allocate per cpu (default: 128) * @hashfn: Hash function (default: jhash2 if !(key_len % 4), or jhash) * @obj_hashfn: Function to hash object @@ -116,6 +117,7 @@ struct rhashtable_params { unsigned int max_size; unsigned int min_size; u32 nulls_base; + bool insecure_elasticity; size_t locks_mul; rht_hashfn_t hashfn; rht_obj_hashfn_t obj_hashfn; @@ -127,6 +129,7 @@ struct rhashtable_params { * @tbl: Bucket table * @nelems: Number of elements in table * @key_len: Key length for hashfn + * @elasticity: Maximum chain length before rehash * @p: Configuration parameters * @run_work: Deferred worker to expand/shrink asynchronously * @mutex: Mutex to protect current/future table swapping @@ -137,6 +140,7 @@ struct rhashtable { atomic_t nelems; bool being_destroyed; unsigned int key_len; + unsigned int elasticity; struct rhashtable_params p; struct work_struct run_work; struct mutex mutex; @@ -266,6 +270,17 @@ static inline bool rht_shrink_below_30(const struct rhashtable *ht, tbl->size > ht->p.min_size; } +/** + * rht_grow_above_100 - returns true if nelems > table-size + * @ht: hash table + * @tbl: current table + */ +static inline bool rht_grow_above_100(const struct rhashtable *ht, + const struct bucket_table *tbl) +{ + return atomic_read(&ht->nelems) > tbl->size; +} + /* The bucket lock is selected based on the hash and protects mutations * on a group of hash buckets. * @@ -307,6 +322,7 @@ int rhashtable_init(struct rhashtable *ht, int rhashtable_insert_slow(struct rhashtable *ht, const void *key, struct rhash_head *obj, struct bucket_table *old_tbl); +int rhashtable_insert_rehash(struct rhashtable *ht); int rhashtable_walk_init(struct rhashtable *ht, struct rhashtable_iter *iter); void rhashtable_walk_exit(struct rhashtable_iter *iter); @@ -529,12 +545,14 @@ static inline int __rhashtable_insert_fast( .ht = ht, .key = key, }; - int err = -EEXIST; struct bucket_table *tbl, *new_tbl; struct rhash_head *head; spinlock_t *lock; + unsigned elasticity; unsigned hash; + int err; +restart: rcu_read_lock(); tbl = rht_dereference_rcu(ht->tbl, ht); @@ -557,20 +575,34 @@ static inline int __rhashtable_insert_fast( new_tbl = rht_dereference_rcu(tbl->future_tbl, ht); if (unlikely(new_tbl)) { err = rhashtable_insert_slow(ht, key, obj, new_tbl); + if (err == -EAGAIN) + goto slow_path; goto out; } - if (!key) - goto skip_lookup; + if (unlikely(rht_grow_above_100(ht, tbl))) { +slow_path: + spin_unlock_bh(lock); + rcu_read_unlock(); + err = rhashtable_insert_rehash(ht); + if (err) + return err; + + goto restart; + } + err = -EEXIST; + elasticity = ht->elasticity; rht_for_each(head, tbl, hash) { - if (unlikely(!(params.obj_cmpfn ? + if (key && + unlikely(!(params.obj_cmpfn ? params.obj_cmpfn(&arg, rht_obj(ht, head)) : rhashtable_compare(&arg, rht_obj(ht, head))))) goto out; + if (!--elasticity) + goto slow_path; } -skip_lookup: err = 0; head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash); -- cgit v1.2.3 From 3d1bec99320d4e96897805440f8cf4f68eff226b Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Mon, 23 Mar 2015 23:36:00 +0100 Subject: ipv6: introduce secret_stable to ipv6_devconf MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch implements the procfs logic for the stable_address knob: The secret is formatted as an ipv6 address and will be stored per interface and per namespace. We track initialized flag and return EIO errors until the secret is set. We don't inherit the secret to newly created namespaces. Cc: Erik Kline Cc: Fernando Gont Cc: Lorenzo Colitti Cc: YOSHIFUJI Hideaki/吉藤英明 Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/linux/ipv6.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 4d5169f5d7d1..82806c60aa42 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -53,6 +53,10 @@ struct ipv6_devconf { __s32 ndisc_notify; __s32 suppress_frag_ndisc; __s32 accept_ra_mtu; + struct ipv6_stable_secret { + bool initialized; + struct in6_addr secret; + } stable_secret; void *sysctl; }; -- cgit v1.2.3 From ba7c95ea3870fe7b847466d39a049ab6f156aa2c Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 24 Mar 2015 09:53:17 +1100 Subject: rhashtable: Fix sleeping inside RCU critical section in walk_stop The commit 963ecbd41a1026d99ec7537c050867428c397b89 ("rhashtable: Fix use-after-free in rhashtable_walk_stop") fixed a real bug but created another one because we may end up sleeping inside an RCU critical section. This patch fixes it properly by replacing the mutex with a spin lock that specifically protects the walker lists. Reported-by: Sasha Levin Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index f9ecf32bce55..d7be9cb0e91f 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -133,6 +133,7 @@ struct rhashtable_params { * @p: Configuration parameters * @run_work: Deferred worker to expand/shrink asynchronously * @mutex: Mutex to protect current/future table swapping + * @lock: Spin lock to protect walker list * @being_destroyed: True if table is set up for destruction */ struct rhashtable { @@ -144,6 +145,7 @@ struct rhashtable { struct rhashtable_params p; struct work_struct run_work; struct mutex mutex; + spinlock_t lock; }; /** -- cgit v1.2.3 From 0117ec1970c5fa9c566045e7df8db76acc8f150e Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Mon, 23 Mar 2015 18:40:02 +0100 Subject: net: remove never used forwarding_accel_ops pointer from net_device Cc: John Fastabend Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 5ae69e7df867..08c4ab37189f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1342,7 +1342,6 @@ enum netdev_priv_flags { * @netdev_ops: Includes several pointers to callbacks, * if one wants to override the ndo_*() functions * @ethtool_ops: Management operations - * @fwd_ops: Management operations * @header_ops: Includes callbacks for creating,parsing,caching,etc * of Layer 2 headers. * @@ -1551,7 +1550,6 @@ struct net_device { #endif const struct net_device_ops *netdev_ops; const struct ethtool_ops *ethtool_ops; - const struct forwarding_accel_ops *fwd_ops; #ifdef CONFIG_NET_SWITCHDEV const struct swdev_ops *swdev_ops; #endif -- cgit v1.2.3 From 27cd5452476978283decb19e429e81fc6c71e74b Mon Sep 17 00:00:00 2001 From: Michal Sekletar Date: Tue, 24 Mar 2015 14:48:41 +0100 Subject: filter: introduce SKF_AD_VLAN_TPID BPF extension If vlan offloading takes place then vlan header is removed from frame and its contents, both vlan_tci and vlan_proto, is available to user space via TPACKET interface. However, only vlan_tci can be used in BPF filters. This commit introduces a new BPF extension. It makes possible to load the value of vlan_proto (vlan TPID) to register A. Support for classic BPF and eBPF is being added, analogous to skb->protocol. Cc: Daniel Borkmann Cc: Alexei Starovoitov Cc: Jiri Pirko Signed-off-by: Michal Sekletar Acked-by: Daniel Borkmann Acked-by: Alexei Starovoitov Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/filter.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index 9ee8c67ea249..fa11b3a367be 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -454,6 +454,7 @@ static inline u16 bpf_anc_helper(const struct sock_filter *ftest) BPF_ANCILLARY(VLAN_TAG_PRESENT); BPF_ANCILLARY(PAY_OFFSET); BPF_ANCILLARY(RANDOM); + BPF_ANCILLARY(VLAN_TPID); } /* Fallthrough. */ default: -- cgit v1.2.3 From 58be8a583d8d316448bafa5926414cfb83c02dec Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 24 Mar 2015 14:18:16 +0100 Subject: rhashtable: Extend RCU read lock into rhashtable_insert_rehash() rhashtable_insert_rehash() requires RCU locks to be held in order to access ht->tbl and traverse to the last table. Fixes: ccd57b1bd324 ("rhashtable: Add immediate rehash during insertion") Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index d7be9cb0e91f..5976ab59b88f 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -585,8 +585,8 @@ restart: if (unlikely(rht_grow_above_100(ht, tbl))) { slow_path: spin_unlock_bh(lock); - rcu_read_unlock(); err = rhashtable_insert_rehash(ht); + rcu_read_unlock(); if (err) return err; -- cgit v1.2.3 From 299e5c32a37a6bca8175db177117467bd1ce970a Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 24 Mar 2015 14:18:17 +0100 Subject: rhashtable: Use 'unsigned int' consistently Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 5976ab59b88f..f89cda067cb9 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -208,13 +208,13 @@ static inline unsigned int rht_key_hashfn( struct rhashtable *ht, const struct bucket_table *tbl, const void *key, const struct rhashtable_params params) { - unsigned hash; + unsigned int hash; /* params must be equal to ht->p if it isn't constant. */ if (!__builtin_constant_p(params.key_len)) hash = ht->p.hashfn(key, ht->key_len, tbl->hash_rnd); else if (params.key_len) { - unsigned key_len = params.key_len; + unsigned int key_len = params.key_len; if (params.hashfn) hash = params.hashfn(key, key_len, tbl->hash_rnd); @@ -224,7 +224,7 @@ static inline unsigned int rht_key_hashfn( hash = jhash2(key, key_len / sizeof(u32), tbl->hash_rnd); } else { - unsigned key_len = ht->p.key_len; + unsigned int key_len = ht->p.key_len; if (params.hashfn) hash = params.hashfn(key, key_len, tbl->hash_rnd); @@ -512,7 +512,7 @@ static inline void *rhashtable_lookup_fast( }; const struct bucket_table *tbl; struct rhash_head *he; - unsigned hash; + unsigned int hash; rcu_read_lock(); @@ -550,8 +550,8 @@ static inline int __rhashtable_insert_fast( struct bucket_table *tbl, *new_tbl; struct rhash_head *head; spinlock_t *lock; - unsigned elasticity; - unsigned hash; + unsigned int elasticity; + unsigned int hash; int err; restart: @@ -718,7 +718,7 @@ static inline int __rhashtable_remove_fast( struct rhash_head __rcu **pprev; struct rhash_head *he; spinlock_t * lock; - unsigned hash; + unsigned int hash; int err = -ENOENT; hash = rht_head_hashfn(ht, tbl, obj, params); -- cgit v1.2.3 From ac833bddb591b9c7a0609f440f196375be184044 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 24 Mar 2015 14:18:18 +0100 Subject: rhashtable: Mark internal/private inline functions as such Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index f89cda067cb9..0e1f975ad101 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -539,6 +539,7 @@ restart: return NULL; } +/* Internal function, please use rhashtable_insert_fast() instead */ static inline int __rhashtable_insert_fast( struct rhashtable *ht, const void *key, struct rhash_head *obj, const struct rhashtable_params params) @@ -711,6 +712,7 @@ static inline int rhashtable_lookup_insert_key( return __rhashtable_insert_fast(ht, key, obj, params); } +/* Internal function, please use rhashtable_remove_fast() instead */ static inline int __rhashtable_remove_fast( struct rhashtable *ht, struct bucket_table *tbl, struct rhash_head *obj, const struct rhashtable_params params) -- cgit v1.2.3 From b5e2c150ac914f28a28833b57397bec0b0a2bd5f Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 24 Mar 2015 20:42:19 +0000 Subject: rhashtable: Disable automatic shrinking by default Introduce a new bool automatic_shrinking to require the user to explicitly opt-in to automatic shrinking of tables. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 0e1f975ad101..ae26c494e230 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -2,7 +2,7 @@ * Resizable, Scalable, Concurrent Hash Table * * Copyright (c) 2015 Herbert Xu - * Copyright (c) 2014 Thomas Graf + * Copyright (c) 2014-2015 Thomas Graf * Copyright (c) 2008-2014 Patrick McHardy * * Code partially derived from nft_hash @@ -104,6 +104,7 @@ struct rhashtable; * @min_size: Minimum size while shrinking * @nulls_base: Base value to generate nulls marker * @insecure_elasticity: Set to true to disable chain length checks + * @automatic_shrinking: Enable automatic shrinking of tables * @locks_mul: Number of bucket locks to allocate per cpu (default: 128) * @hashfn: Hash function (default: jhash2 if !(key_len % 4), or jhash) * @obj_hashfn: Function to hash object @@ -118,6 +119,7 @@ struct rhashtable_params { unsigned int min_size; u32 nulls_base; bool insecure_elasticity; + bool automatic_shrinking; size_t locks_mul; rht_hashfn_t hashfn; rht_obj_hashfn_t obj_hashfn; @@ -784,7 +786,8 @@ static inline int rhashtable_remove_fast( goto out; atomic_dec(&ht->nelems); - if (rht_shrink_below_30(ht, tbl)) + if (unlikely(ht->p.automatic_shrinking && + rht_shrink_below_30(ht, tbl))) schedule_work(&ht->run_work); out: -- cgit v1.2.3 From 6b6f302ceda7a052dab545d6c69abf5f0d4a6cab Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 24 Mar 2015 14:18:20 +0100 Subject: rhashtable: Add rhashtable_free_and_destroy() rhashtable_destroy() variant which stops rehashes, iterates over the table and calls a callback to release resources. Avoids need for nft_hash to embed rhashtable internals and allows to get rid of the being_destroyed flag. It also saves a 2nd mutex lock upon destruction. Also fixes an RCU lockdep splash on nft set destruction due to calling rht_for_each_entry_safe() without holding bucket locks. Open code this loop as we need know that no mutations may occur in parallel. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index ae26c494e230..99f2e49a8a07 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -136,12 +136,10 @@ struct rhashtable_params { * @run_work: Deferred worker to expand/shrink asynchronously * @mutex: Mutex to protect current/future table swapping * @lock: Spin lock to protect walker list - * @being_destroyed: True if table is set up for destruction */ struct rhashtable { struct bucket_table __rcu *tbl; atomic_t nelems; - bool being_destroyed; unsigned int key_len; unsigned int elasticity; struct rhashtable_params p; @@ -334,6 +332,9 @@ int rhashtable_walk_start(struct rhashtable_iter *iter) __acquires(RCU); void *rhashtable_walk_next(struct rhashtable_iter *iter); void rhashtable_walk_stop(struct rhashtable_iter *iter) __releases(RCU); +void rhashtable_free_and_destroy(struct rhashtable *ht, + void (*free_fn)(void *ptr, void *arg), + void *arg); void rhashtable_destroy(struct rhashtable *ht); #define rht_dereference(p, ht) \ -- cgit v1.2.3 From 49f7b33e63fec9d16e7ee62ba8f8ab4159cbdc26 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 25 Mar 2015 13:07:45 +0000 Subject: rhashtable: provide len to obj_hashfn nftables sets will be converted to use so called setextensions, moving the key to a non-fixed position. To hash it, the obj_hashfn must be used, however it so far doesn't receive the length parameter. Pass the key length to obj_hashfn() and convert existing users. Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- include/linux/rhashtable.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 99f2e49a8a07..e23d242d1230 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -88,7 +88,7 @@ struct rhashtable_compare_arg { }; typedef u32 (*rht_hashfn_t)(const void *data, u32 len, u32 seed); -typedef u32 (*rht_obj_hashfn_t)(const void *data, u32 seed); +typedef u32 (*rht_obj_hashfn_t)(const void *data, u32 len, u32 seed); typedef int (*rht_obj_cmpfn_t)(struct rhashtable_compare_arg *arg, const void *obj); @@ -242,7 +242,9 @@ static inline unsigned int rht_head_hashfn( const char *ptr = rht_obj(ht, he); return likely(params.obj_hashfn) ? - rht_bucket_index(tbl, params.obj_hashfn(ptr, tbl->hash_rnd)) : + rht_bucket_index(tbl, params.obj_hashfn(ptr, params.key_len ?: + ht->p.key_len, + tbl->hash_rnd)) : rht_key_hashfn(ht, tbl, ptr + params.key_offset, params); } -- cgit v1.2.3 From dece45855a8b0d1dcf48eb01d0822070ded6a4c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Perrochaud?= Date: Mon, 9 Mar 2015 11:12:04 +0100 Subject: NFC: nxp-nci: Add support for NXP NCI chips MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support for NXP NCI NFC controllers such as the NPC100 or PN7150 families. Signed-off-by: Clément Perrochaud Signed-off-by: Samuel Ortiz --- include/linux/platform_data/nxp-nci.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 include/linux/platform_data/nxp-nci.h (limited to 'include/linux') diff --git a/include/linux/platform_data/nxp-nci.h b/include/linux/platform_data/nxp-nci.h new file mode 100644 index 000000000000..d6ed28679bb2 --- /dev/null +++ b/include/linux/platform_data/nxp-nci.h @@ -0,0 +1,27 @@ +/* + * Generic platform data for the NXP NCI NFC chips. + * + * Copyright (C) 2014 NXP Semiconductors All rights reserved. + * + * Authors: Clément Perrochaud + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef _NXP_NCI_H_ +#define _NXP_NCI_H_ + +struct nxp_nci_nfc_platform_data { + unsigned int gpio_en; + unsigned int gpio_fw; + unsigned int irq; +}; + +#endif /* _NXP_NCI_H_ */ -- cgit v1.2.3 From 608cd71a9c7c9db76e78a792c5a4101e12fea43f Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 26 Mar 2015 19:53:57 -0700 Subject: tc: bpf: generalize pedit action existing TC action 'pedit' can munge any bits of the packet. Generalize it for use in bpf programs attached as cls_bpf and act_bpf via bpf_skb_store_bytes() helper function. Signed-off-by: Alexei Starovoitov Reviewed-by: Jiri Pirko Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 280a315de8d6..d5cda067115a 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -59,6 +59,7 @@ enum bpf_arg_type { ARG_PTR_TO_STACK, /* any pointer to eBPF program stack */ ARG_CONST_STACK_SIZE, /* number of bytes accessed from stack */ + ARG_PTR_TO_CTX, /* pointer to context */ ARG_ANYTHING, /* any (initialized) argument is ok */ }; -- cgit v1.2.3 From f5a7fb88e1f82542ca14ba93a1d4fa35471c60ca Mon Sep 17 00:00:00 2001 From: Toshiaki Makita Date: Fri, 27 Mar 2015 14:31:11 +0900 Subject: vlan: Introduce helper functions to check if skb is tagged Separate the two checks for single vlan and multiple vlans in netif_skb_features(). This allows us to move the check for multiple vlans to another function later. Signed-off-by: Toshiaki Makita Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) (limited to 'include/linux') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index b11b28a30b9e..4265d440ec4d 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -561,4 +561,49 @@ static inline void vlan_set_encap_proto(struct sk_buff *skb, skb->protocol = htons(ETH_P_802_2); } +/** + * skb_vlan_tagged - check if skb is vlan tagged. + * @skb: skbuff to query + * + * Returns true if the skb is tagged, regardless of whether it is hardware + * accelerated or not. + */ +static inline bool skb_vlan_tagged(const struct sk_buff *skb) +{ + if (!skb_vlan_tag_present(skb) && + likely(skb->protocol != htons(ETH_P_8021Q) && + skb->protocol != htons(ETH_P_8021AD))) + return false; + + return true; +} + +/** + * skb_vlan_tagged_multi - check if skb is vlan tagged with multiple headers. + * @skb: skbuff to query + * + * Returns true if the skb is tagged with multiple vlan headers, regardless + * of whether it is hardware accelerated or not. + */ +static inline bool skb_vlan_tagged_multi(const struct sk_buff *skb) +{ + __be16 protocol = skb->protocol; + + if (!skb_vlan_tag_present(skb)) { + struct vlan_ethhdr *veh; + + if (likely(protocol != htons(ETH_P_8021Q) && + protocol != htons(ETH_P_8021AD))) + return false; + + veh = (struct vlan_ethhdr *)skb->data; + protocol = veh->h_vlan_encapsulated_proto; + } + + if (protocol != htons(ETH_P_8021Q) && protocol != htons(ETH_P_8021AD)) + return false; + + return true; +} + #endif /* !(_LINUX_IF_VLAN_H_) */ -- cgit v1.2.3 From 8cb65d00086bfba22bac87ff18b751432fc74003 Mon Sep 17 00:00:00 2001 From: Toshiaki Makita Date: Fri, 27 Mar 2015 14:31:12 +0900 Subject: net: Move check for multiple vlans to drivers To allow drivers to handle the features check for multiple tags, move the check to ndo_features_check(). As no drivers currently handle multiple tagged TSO, introduce dflt_features_check() and call it if the driver does not have ndo_features_check(). Signed-off-by: Toshiaki Makita Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include/linux') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 4265d440ec4d..920e4457ce6e 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -606,4 +606,26 @@ static inline bool skb_vlan_tagged_multi(const struct sk_buff *skb) return true; } +/** + * vlan_features_check - drop unsafe features for skb with multiple tags. + * @skb: skbuff to query + * @features: features to be checked + * + * Returns features without unsafe ones if the skb has multiple tags. + */ +static inline netdev_features_t vlan_features_check(const struct sk_buff *skb, + netdev_features_t features) +{ + if (skb_vlan_tagged_multi(skb)) + features = netdev_intersect_features(features, + NETIF_F_SG | + NETIF_F_HIGHDMA | + NETIF_F_FRAGLIST | + NETIF_F_GEN_CSUM | + NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_STAG_TX); + + return features; +} + #endif /* !(_LINUX_IF_VLAN_H_) */ -- cgit v1.2.3 From e38f30256b36700aa63aa709dc091bf6eb69c257 Mon Sep 17 00:00:00 2001 From: Toshiaki Makita Date: Fri, 27 Mar 2015 14:31:13 +0900 Subject: net: Introduce passthru_features_check As there are a number of (especially virtual) devices that don't need the multiple vlan check, introduce passthru_features_check() for convenience. Signed-off-by: Toshiaki Makita Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 08c4ab37189f..967bb4c8caf1 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3657,6 +3657,9 @@ void netdev_change_features(struct net_device *dev); void netif_stacked_transfer_operstate(const struct net_device *rootdev, struct net_device *dev); +netdev_features_t passthru_features_check(struct sk_buff *skb, + struct net_device *dev, + netdev_features_t features); netdev_features_t netif_skb_features(struct sk_buff *skb); static inline bool net_gso_ok(netdev_features_t features, int gso_type) -- cgit v1.2.3 From 92f1719407b90475b3be0b7b9c983dec2ff8351e Mon Sep 17 00:00:00 2001 From: Richard Cochran Date: Sun, 29 Mar 2015 23:11:51 +0200 Subject: ptp: introduce get/set time methods with explicit 64 bit seconds. Converting the PHC drivers over to the new methods is one step along the way to making them ready for 2038. Once all the drivers are up to date, then the old methods will be removed. Signed-off-by: Richard Cochran Signed-off-by: David S. Miller --- include/linux/ptp_clock_kernel.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h index 0d8ff3fb84ba..7d6f8e66396f 100644 --- a/include/linux/ptp_clock_kernel.h +++ b/include/linux/ptp_clock_kernel.h @@ -64,12 +64,18 @@ struct ptp_clock_request { * @adjtime: Shifts the time of the hardware clock. * parameter delta: Desired change in nanoseconds. * - * @gettime: Reads the current time from the hardware clock. + * @gettime: Reads the current time from the hardware clock. (deprecated) * parameter ts: Holds the result. * - * @settime: Set the current time on the hardware clock. + * @settime: Set the current time on the hardware clock. (deprecated) * parameter ts: Time value to set. * + * @gettime64: Reads the current time from the hardware clock. + * parameter ts: Holds the result. + * + * @settime64: Set the current time on the hardware clock. + * parameter ts: Time value to set. + * * @enable: Request driver to enable or disable an ancillary feature. * parameter request: Desired resource to enable or disable. * parameter on: Caller passes one to enable or zero to disable. @@ -106,6 +112,8 @@ struct ptp_clock_info { int (*adjtime)(struct ptp_clock_info *ptp, s64 delta); int (*gettime)(struct ptp_clock_info *ptp, struct timespec *ts); int (*settime)(struct ptp_clock_info *ptp, const struct timespec *ts); + int (*gettime64)(struct ptp_clock_info *ptp, struct timespec64 *ts); + int (*settime64)(struct ptp_clock_info *p, const struct timespec64 *ts); int (*enable)(struct ptp_clock_info *ptp, struct ptp_clock_request *request, int on); int (*verify)(struct ptp_clock_info *ptp, unsigned int pin, -- cgit v1.2.3 From ed7c6317bc599502e1fdc7f5f95cb9a5550360a4 Mon Sep 17 00:00:00 2001 From: Richard Cochran Date: Sun, 29 Mar 2015 23:12:13 +0200 Subject: ptp: remove 32 bit get/set methods. All of the PHC drivers have been converted to the new methods. This patch converts the three remaining callers within the core code and removes the older methods for good. As a result, the core PHC code is ready for the year 2038. However, some of the PHC drivers are not quite ready yet. Signed-off-by: Richard Cochran Signed-off-by: David S. Miller --- include/linux/ptp_clock_kernel.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h index 7d6f8e66396f..b8b73066d137 100644 --- a/include/linux/ptp_clock_kernel.h +++ b/include/linux/ptp_clock_kernel.h @@ -64,12 +64,6 @@ struct ptp_clock_request { * @adjtime: Shifts the time of the hardware clock. * parameter delta: Desired change in nanoseconds. * - * @gettime: Reads the current time from the hardware clock. (deprecated) - * parameter ts: Holds the result. - * - * @settime: Set the current time on the hardware clock. (deprecated) - * parameter ts: Time value to set. - * * @gettime64: Reads the current time from the hardware clock. * parameter ts: Holds the result. * @@ -110,8 +104,6 @@ struct ptp_clock_info { struct ptp_pin_desc *pin_config; int (*adjfreq)(struct ptp_clock_info *ptp, s32 delta); int (*adjtime)(struct ptp_clock_info *ptp, s64 delta); - int (*gettime)(struct ptp_clock_info *ptp, struct timespec *ts); - int (*settime)(struct ptp_clock_info *ptp, const struct timespec *ts); int (*gettime64)(struct ptp_clock_info *ptp, struct timespec64 *ts); int (*settime64)(struct ptp_clock_info *p, const struct timespec64 *ts); int (*enable)(struct ptp_clock_info *ptp, -- cgit v1.2.3 From 930345ea630405aa6e6f42efcb149c3f360a6b67 Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Sun, 29 Mar 2015 16:59:25 +0200 Subject: netlink: implement nla_put_in_addr and nla_put_in6_addr IP addresses are often stored in netlink attributes. Add generic functions to do that. For nla_put_in_addr, it would be nicer to pass struct in_addr but this is not used universally throughout the kernel, in way too many places __be32 is used to store IPv4 address. Signed-off-by: Jiri Benc Signed-off-by: David S. Miller --- include/linux/netfilter/ipset/ip_set.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index f1606fa6132d..34b172301558 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -483,7 +483,7 @@ static inline int nla_put_ipaddr4(struct sk_buff *skb, int type, __be32 ipaddr) if (!__nested) return -EMSGSIZE; - ret = nla_put_net32(skb, IPSET_ATTR_IPADDR_IPV4, ipaddr); + ret = nla_put_in_addr(skb, IPSET_ATTR_IPADDR_IPV4, ipaddr); if (!ret) ipset_nest_end(skb, __nested); return ret; @@ -497,8 +497,7 @@ static inline int nla_put_ipaddr6(struct sk_buff *skb, int type, if (!__nested) return -EMSGSIZE; - ret = nla_put(skb, IPSET_ATTR_IPADDR_IPV6, - sizeof(struct in6_addr), ipaddrptr); + ret = nla_put_in6_addr(skb, IPSET_ATTR_IPADDR_IPV6, ipaddrptr); if (!ret) ipset_nest_end(skb, __nested); return ret; -- cgit v1.2.3 From ffa88f37ffeaac398be68f9678b0e6046a5ba7f6 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Mon, 30 Mar 2015 17:45:22 +0300 Subject: net/mlx4_en: Move statistics bitmap setting to the Ethernet driver The statistics bitmap belongs to the Ethernet driver, move it there. Signed-off-by: Eran Ben Elisha Signed-off-by: Hadar Hen Zion Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/mlx4/device.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 4550c67b92e4..49abbe28e230 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -1300,7 +1300,6 @@ int mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac); void mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, u64 mac); int mlx4_get_base_qpn(struct mlx4_dev *dev, u8 port); int __mlx4_replace_mac(struct mlx4_dev *dev, u8 port, int qpn, u64 new_mac); -void mlx4_set_stats_bitmap(struct mlx4_dev *dev, u64 *stats_bitmap); int mlx4_SET_PORT_general(struct mlx4_dev *dev, u8 port, int mtu, u8 pptx, u8 pfctx, u8 pprx, u8 pfcrx); int mlx4_SET_PORT_qpn_calc(struct mlx4_dev *dev, u8 port, u32 base_qpn, -- cgit v1.2.3 From 0b131561a7d639abb0a194d2d8fae839ce3b99e9 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Mon, 30 Mar 2015 17:45:25 +0300 Subject: net/mlx4_en: Add Flow control statistics display via ethtool Flow control per priority and Global pause counters are now visible via ethtool. The counters shows statistics regarding pauses in the device. Signed-off-by: Matan Barak Signed-off-by: Shani Michaeli Signed-off-by: Eran Ben Elisha Signed-off-by: Hadar Hen Zion Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/mlx4/device.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 49abbe28e230..ab7ebec943b8 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -205,7 +205,8 @@ enum { MLX4_DEV_CAP_FLAG2_RECOVERABLE_ERROR_EVENT = 1LL << 20, MLX4_DEV_CAP_FLAG2_PORT_REMAP = 1LL << 21, MLX4_DEV_CAP_FLAG2_QCN = 1LL << 22, - MLX4_DEV_CAP_FLAG2_QP_RATE_LIMIT = 1LL << 23 + MLX4_DEV_CAP_FLAG2_QP_RATE_LIMIT = 1LL << 23, + MLX4_DEV_CAP_FLAG2_FLOWSTATS_EN = 1LL << 24 }; enum { -- cgit v1.2.3 From a54acb3a6f853e8394c4cb7b6a4d93c88f13eefd Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Thu, 2 Apr 2015 17:07:00 +0200 Subject: dev: introduce dev_get_iflink() The goal of this patch is to prepare the removal of the iflink field. It introduces a new ndo function, which will be implemented by virtual interfaces. There is no functional change into this patch. All readers of iflink field now call dev_get_iflink(). Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 967bb4c8caf1..788eb7a622ad 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1030,6 +1030,8 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev, * int queue_index, u32 maxrate); * Called when a user wants to set a max-rate limitation of specific * TX queue. + * int (*ndo_get_iflink)(const struct net_device *dev); + * Called to get the iflink value of this device. */ struct net_device_ops { int (*ndo_init)(struct net_device *dev); @@ -1191,6 +1193,7 @@ struct net_device_ops { int (*ndo_set_tx_maxrate)(struct net_device *dev, int queue_index, u32 maxrate); + int (*ndo_get_iflink)(const struct net_device *dev); }; /** @@ -2149,6 +2152,7 @@ void __dev_remove_pack(struct packet_type *pt); void dev_add_offload(struct packet_offload *po); void dev_remove_offload(struct packet_offload *po); +int dev_get_iflink(const struct net_device *dev); struct net_device *__dev_get_by_flags(struct net *net, unsigned short flags, unsigned short mask); struct net_device *dev_get_by_name(struct net *net, const char *name); -- cgit v1.2.3 From 7a66bbc96ce9ad8261fa5f7f6ae65370eb6866ee Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Thu, 2 Apr 2015 17:07:09 +0200 Subject: net: remove iflink field from struct net_device Now that all users of iflink have the ndo_get_iflink handler available, it's possible to remove this field. By default, dev_get_iflink() returns the ifindex of the interface. Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 788eb7a622ad..846a1f5bc9db 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1538,7 +1538,7 @@ struct net_device { netdev_features_t mpls_features; int ifindex; - int iflink; + int group; struct net_device_stats stats; @@ -1741,7 +1741,6 @@ struct net_device { #endif struct phy_device *phydev; struct lock_class_key *qdisc_tx_busylock; - int group; struct pm_qos_request pm_qos_req; }; #define to_net_dev(d) container_of(d, struct net_device, dev) -- cgit v1.2.3 From 802f42a8d99254b8602bf65cc50e8333bf479f13 Mon Sep 17 00:00:00 2001 From: Ido Shamay Date: Thu, 2 Apr 2015 16:31:06 +0300 Subject: net/mlx4: Add RSS support for fragmented IP datagrams Enable RSS support for fragmented IP packets, when device supports it. Until now, fragmented IP packets were directed only to the default_qpn. Since IP fragments (datagram) have no upper protocols (L3 IP packets), hash is performed on 3-tuple - dst MAC, source IP and dest IP. The HW makes sure that this holds for the 1st fragment too, so all fragments go to the same QP. Signed-off-by: Ido Shamay Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/mlx4/device.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index ab7ebec943b8..41eaafdfb1cd 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -174,6 +174,7 @@ enum { MLX4_DEV_CAP_FLAG_VEP_UC_STEER = 1LL << 41, MLX4_DEV_CAP_FLAG_VEP_MC_STEER = 1LL << 42, MLX4_DEV_CAP_FLAG_COUNTERS = 1LL << 48, + MLX4_DEV_CAP_FLAG_RSS_IP_FRAG = 1LL << 52, MLX4_DEV_CAP_FLAG_SET_ETH_SCHED = 1LL << 53, MLX4_DEV_CAP_FLAG_SENSE_SUPPORT = 1LL << 55, MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV = 1LL << 59, -- cgit v1.2.3 From fccea6436a5bd16aed6f722efce13ec2c90427d7 Mon Sep 17 00:00:00 2001 From: Ido Shamay Date: Thu, 2 Apr 2015 16:31:08 +0300 Subject: net/mlx4: Make mlx4_is_eth visible inline funcion Currently implemented as static function in resource_tracker.c -- this change will allow other files in mlx4_core to use it as well. Signed-off-by: Ido Shamay Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/mlx4/device.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 41eaafdfb1cd..0f7f13d6a03e 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -1002,6 +1002,11 @@ static inline int mlx4_is_slave(struct mlx4_dev *dev) return dev->flags & MLX4_FLAG_SLAVE; } +static inline int mlx4_is_eth(struct mlx4_dev *dev, int port) +{ + return dev->caps.port_type[port] == MLX4_PORT_TYPE_IB ? 0 : 1; +} + int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct, struct mlx4_buf *buf, gfp_t gfp); void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf); -- cgit v1.2.3 From 12a889c057504fbf307dd237aedb87263ef2848a Mon Sep 17 00:00:00 2001 From: Ido Shamay Date: Thu, 2 Apr 2015 16:31:10 +0300 Subject: net/mlx4: New file for QoS related firmware commands Create two new files fw_qos.h and fw_qos.c in mlx4_core module. It gathers all relevant QoS firmware related commands etc, thus improving encapsulation of the mlx4_core module. For now it contains the QoS existing commands: mlx4_SET_PORT_SCHEDULER and mlx4_SET_PORT_PRIO2TC. Signed-off-by: Ido Shamay Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/mlx4/device.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 0f7f13d6a03e..b676d0c0111a 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -49,8 +49,6 @@ #define MSIX_LEGACY_SZ 4 #define MIN_MSIX_P_PORT 5 -#define MLX4_NUM_UP 8 -#define MLX4_NUM_TC 8 #define MLX4_MAX_100M_UNITS_VAL 255 /* * work around: can't set values * greater then this value when @@ -1311,9 +1309,6 @@ int mlx4_SET_PORT_general(struct mlx4_dev *dev, u8 port, int mtu, u8 pptx, u8 pfctx, u8 pprx, u8 pfcrx); int mlx4_SET_PORT_qpn_calc(struct mlx4_dev *dev, u8 port, u32 base_qpn, u8 promisc); -int mlx4_SET_PORT_PRIO2TC(struct mlx4_dev *dev, u8 port, u8 *prio2tc); -int mlx4_SET_PORT_SCHEDULER(struct mlx4_dev *dev, u8 port, u8 *tc_tx_bw, - u8 *pg, u16 *ratelimit); int mlx4_SET_PORT_VXLAN(struct mlx4_dev *dev, u8 port, u8 steering, int enable); int mlx4_find_cached_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *idx); int mlx4_find_cached_vlan(struct mlx4_dev *dev, u8 port, u16 vid, int *idx); -- cgit v1.2.3 From 7e95bb99a8f969d4534867452793e44cc4731ca9 Mon Sep 17 00:00:00 2001 From: Ido Shamay Date: Thu, 2 Apr 2015 16:31:11 +0300 Subject: net/mlx4: Add mlx4_ALLOCATE_VPP implementation Implements device ALLOCATE_VPP command, to be used for granular QoS configuration of VFs by the PF device. Defines and queries the amount of VPPs assigned to each port, and the amount of VPPs assigned to each priority of each port. Once the total VPPs are split between the priorities of a port, they may be assigned with a share of the BW or a rate limit. Split into two functions (get/set) whoch are supplied with mlx4_alloc_vpp_context and physical port number. Signed-off-by: Ido Shamay Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/mlx4/cmd.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h index 7299e9548906..e0f88a098fb8 100644 --- a/include/linux/mlx4/cmd.h +++ b/include/linux/mlx4/cmd.h @@ -68,6 +68,7 @@ enum { MLX4_CMD_UNMAP_ICM_AUX = 0xffb, MLX4_CMD_SET_ICM_SIZE = 0xffd, MLX4_CMD_ACCESS_REG = 0x3b, + MLX4_CMD_ALLOCATE_VPP = 0x80, /*master notify fw on finish for slave's flr*/ MLX4_CMD_INFORM_FLR_DONE = 0x5b, -- cgit v1.2.3 From 1c29146d3821be984030b49cf9509a269edc3b8b Mon Sep 17 00:00:00 2001 From: Ido Shamay Date: Thu, 2 Apr 2015 16:31:12 +0300 Subject: net/mlx4: Add mlx4_SET_VPORT_QOS implementation Add the SET_VPORT_QOS device command, which is ntended for virtual granular QoS configuration per VF in SRIOV mode. The SET_VPORT_QOS command sets and queries QoS parameters of a VPort. Each priority allowed for a VPort is assigned with a share of the BW, and a BW limitation. QoS parameters can be modified at any time, but must be initialized before any QP is associated with the VPort. Signed-off-by: Ido Shamay Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/mlx4/cmd.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h index e0f88a098fb8..88326ed0033a 100644 --- a/include/linux/mlx4/cmd.h +++ b/include/linux/mlx4/cmd.h @@ -69,6 +69,7 @@ enum { MLX4_CMD_SET_ICM_SIZE = 0xffd, MLX4_CMD_ACCESS_REG = 0x3b, MLX4_CMD_ALLOCATE_VPP = 0x80, + MLX4_CMD_SET_VPORT_QOS = 0x81, /*master notify fw on finish for slave's flr*/ MLX4_CMD_INFORM_FLR_DONE = 0x5b, -- cgit v1.2.3 From d019fcb2244519d453694bfce868f3e717bfcebb Mon Sep 17 00:00:00 2001 From: Ido Shamay Date: Thu, 2 Apr 2015 16:31:13 +0300 Subject: net/mlx4: Query device for QoS per VF support Checks in QUERY_DEV_CAP if the granular QoS per VF feature is supported by the device. Disabled for guests. Signed-off-by: Ido Shamay Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/mlx4/device.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index b676d0c0111a..c37208f7869f 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -205,7 +205,8 @@ enum { MLX4_DEV_CAP_FLAG2_PORT_REMAP = 1LL << 21, MLX4_DEV_CAP_FLAG2_QCN = 1LL << 22, MLX4_DEV_CAP_FLAG2_QP_RATE_LIMIT = 1LL << 23, - MLX4_DEV_CAP_FLAG2_FLOWSTATS_EN = 1LL << 24 + MLX4_DEV_CAP_FLAG2_FLOWSTATS_EN = 1LL << 24, + MLX4_DEV_CAP_FLAG2_QOS_VPP = 1LL << 25, }; enum { -- cgit v1.2.3 From 08068cd5683f11e4505aa9c8cc6ed5942f8ad299 Mon Sep 17 00:00:00 2001 From: Ido Shamay Date: Thu, 2 Apr 2015 16:31:15 +0300 Subject: net/mlx4: Added qos_vport QP configuration in VST mode Granular QoS per VF feature introduce a new QP field, qos_vport. PF administrator can connect VF QPs to a certain QoS Vport, to inherit its proporties. Connecting QPs to the default QoS Vport (defined as 0) is always allowed, even when there are no allocated VPPs. At this point, only the default vport is connected to QPs. Signed-off-by: Ido Shamay Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/mlx4/qp.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h index 1023ebe035b7..6fed539e5456 100644 --- a/include/linux/mlx4/qp.h +++ b/include/linux/mlx4/qp.h @@ -209,7 +209,8 @@ struct mlx4_qp_context { __be16 sq_wqe_counter; u32 reserved3; __be16 rate_limit_params; - __be16 reserved4; + u8 reserved4; + u8 qos_vport; __be32 param3; __be32 nummmcpeers_basemkey; u8 log_page_size; @@ -231,6 +232,7 @@ struct mlx4_update_qp_context { enum { MLX4_UPD_QP_MASK_PM_STATE = 32, MLX4_UPD_QP_MASK_VSD = 33, + MLX4_UPD_QP_MASK_QOS_VPP = 34, MLX4_UPD_QP_MASK_RATE_LIMIT = 35, }; @@ -432,7 +434,8 @@ enum mlx4_update_qp_attr { MLX4_UPDATE_QP_SMAC = 1 << 0, MLX4_UPDATE_QP_VSD = 1 << 1, MLX4_UPDATE_QP_RATE_LIMIT = 1 << 2, - MLX4_UPDATE_QP_SUPPORTED_ATTRS = (1 << 3) - 1 + MLX4_UPDATE_QP_QOS_VPORT = 1 << 3, + MLX4_UPDATE_QP_SUPPORTED_ATTRS = (1 << 4) - 1 }; enum mlx4_update_qp_params_flags { @@ -441,6 +444,7 @@ enum mlx4_update_qp_params_flags { struct mlx4_update_qp_params { u8 smac_index; + u8 qos_vport; u32 flags; u16 rate_unit; u16 rate_val; -- cgit v1.2.3 From cda373f4849d5dd6fedceb4aeba35682a0e1a833 Mon Sep 17 00:00:00 2001 From: Ido Shamay Date: Thu, 2 Apr 2015 16:31:16 +0300 Subject: net/mlx4_en: Enable TX rate limit per VF Support granular QoS per VF, by implementing the ndo_set_vf_rate. Enforce a rate limit per VF when called, and enabled only for VFs in VST mode with user priority supported by the device. We don't enforce VFs to be in VST mode at the moment of configuration, but rather save the given rate limit and enforce it when the VF is moved to VST with user priority which is supported (currently 0). VST<->VGT or VST qos value state changes are disallowed when a rate limit is configured. Minimum BW share is not supported yet. Signed-off-by: Ido Shamay Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/mlx4/cmd.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h index 88326ed0033a..a788839f54bb 100644 --- a/include/linux/mlx4/cmd.h +++ b/include/linux/mlx4/cmd.h @@ -296,6 +296,8 @@ void mlx4_free_cmd_mailbox(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbo u32 mlx4_comm_get_version(void); int mlx4_set_vf_mac(struct mlx4_dev *dev, int port, int vf, u64 mac); int mlx4_set_vf_vlan(struct mlx4_dev *dev, int port, int vf, u16 vlan, u8 qos); +int mlx4_set_vf_rate(struct mlx4_dev *dev, int port, int vf, int min_tx_rate, + int max_tx_rate); int mlx4_set_vf_spoofchk(struct mlx4_dev *dev, int port, int vf, bool setting); int mlx4_get_vf_config(struct mlx4_dev *dev, int port, int vf, struct ifla_vf_info *ivf); int mlx4_set_vf_link_state(struct mlx4_dev *dev, int port, int vf, int link_state); -- cgit v1.2.3 From 3742cc65512cd4897a63dce94104f9a6e74997a0 Mon Sep 17 00:00:00 2001 From: Ido Shamay Date: Thu, 2 Apr 2015 16:31:17 +0300 Subject: net/mlx4: Warn users of depracated QoS Firmware A new capability bit was introduced in the past to to differ devices using the QoS ETS feature. The old was deprecated since then. If driver sees device which set only the old capabilty, it will print warning to user suggesting to upgrade the FW. Signed-off-by: Ido Shamay Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/mlx4/device.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index c37208f7869f..15f46767342e 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -207,6 +207,7 @@ enum { MLX4_DEV_CAP_FLAG2_QP_RATE_LIMIT = 1LL << 23, MLX4_DEV_CAP_FLAG2_FLOWSTATS_EN = 1LL << 24, MLX4_DEV_CAP_FLAG2_QOS_VPP = 1LL << 25, + MLX4_DEV_CAP_FLAG2_ETS_CFG = 1LL << 26, }; enum { -- cgit v1.2.3 From a130b59057320192b4b00ed0ba4bc8a38f66f289 Mon Sep 17 00:00:00 2001 From: Ido Shamay Date: Thu, 2 Apr 2015 16:31:19 +0300 Subject: net/mlx4: Add SET_PORT opcode modifiers enumeration The calls to SET_PORT used hard-code numbers, when supplying command's opcode modifiers, fix that to use well defined constants. Signed-off-by: Ido Shamay Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/mlx4/cmd.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h index a788839f54bb..62c4d4def474 100644 --- a/include/linux/mlx4/cmd.h +++ b/include/linux/mlx4/cmd.h @@ -188,7 +188,13 @@ enum { }; enum { - /* set port opcode modifiers */ + /* Set port opcode modifiers */ + MLX4_SET_PORT_IB_OPCODE = 0x0, + MLX4_SET_PORT_ETH_OPCODE = 0x1, +}; + +enum { + /* Set port Ethernet input modifiers */ MLX4_SET_PORT_GENERAL = 0x0, MLX4_SET_PORT_RQP_CALC = 0x1, MLX4_SET_PORT_MAC_TABLE = 0x2, -- cgit v1.2.3 From 51af33cfed248dc8f36fa82df06b85e10038a01e Mon Sep 17 00:00:00 2001 From: Ido Shamay Date: Thu, 2 Apr 2015 16:31:20 +0300 Subject: net/mlx4_en: Add interface identify support Add support for the interface ethtool identify feature. Make the physical port LED to blink with green and yellow colors. The device handles the LED blink by itself (synchrous use of set_phys_id), by returning 0 to ETHTOOL_ID_ACTIVE command. Signed-off-by: Eyal Grossman Signed-off-by: Ido Shamay Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/mlx4/cmd.h | 1 + include/linux/mlx4/device.h | 2 ++ 2 files changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h index 62c4d4def474..f62e7cf227c6 100644 --- a/include/linux/mlx4/cmd.h +++ b/include/linux/mlx4/cmd.h @@ -191,6 +191,7 @@ enum { /* Set port opcode modifiers */ MLX4_SET_PORT_IB_OPCODE = 0x0, MLX4_SET_PORT_ETH_OPCODE = 0x1, + MLX4_SET_PORT_BEACON_OPCODE = 0x4, }; enum { diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 15f46767342e..b761be7c88c8 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -208,6 +208,7 @@ enum { MLX4_DEV_CAP_FLAG2_FLOWSTATS_EN = 1LL << 24, MLX4_DEV_CAP_FLAG2_QOS_VPP = 1LL << 25, MLX4_DEV_CAP_FLAG2_ETS_CFG = 1LL << 26, + MLX4_DEV_CAP_FLAG2_PORT_BEACON = 1LL << 27, }; enum { @@ -1311,6 +1312,7 @@ int mlx4_SET_PORT_general(struct mlx4_dev *dev, u8 port, int mtu, u8 pptx, u8 pfctx, u8 pprx, u8 pfcrx); int mlx4_SET_PORT_qpn_calc(struct mlx4_dev *dev, u8 port, u32 base_qpn, u8 promisc); +int mlx4_SET_PORT_BEACON(struct mlx4_dev *dev, u8 port, u16 time); int mlx4_SET_PORT_VXLAN(struct mlx4_dev *dev, u8 port, u8 steering, int enable); int mlx4_find_cached_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *idx); int mlx4_find_cached_vlan(struct mlx4_dev *dev, u8 port, u16 vid, int *idx); -- cgit v1.2.3 From 78500b8c03236a18d454c9cc8a24cccca506b200 Mon Sep 17 00:00:00 2001 From: Muhammad Mahajna Date: Thu, 2 Apr 2015 16:31:22 +0300 Subject: net/mlx4_en: Add RX-ALL support Enabled when the device supports KEEP FCS and IGNORE FCS. When the flag is set, pass all received frames up the stack, even ones with invalid FCS, controlled by ethtool. Signed-off-by: Muhammad Mahajna Signed-off-by: Ido Shamay Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/mlx4/device.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index b761be7c88c8..f9ce34bec45b 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -209,6 +209,7 @@ enum { MLX4_DEV_CAP_FLAG2_QOS_VPP = 1LL << 25, MLX4_DEV_CAP_FLAG2_ETS_CFG = 1LL << 26, MLX4_DEV_CAP_FLAG2_PORT_BEACON = 1LL << 27, + MLX4_DEV_CAP_FLAG2_IGNORE_FCS = 1LL << 28, }; enum { @@ -1313,6 +1314,8 @@ int mlx4_SET_PORT_general(struct mlx4_dev *dev, u8 port, int mtu, int mlx4_SET_PORT_qpn_calc(struct mlx4_dev *dev, u8 port, u32 base_qpn, u8 promisc); int mlx4_SET_PORT_BEACON(struct mlx4_dev *dev, u8 port, u16 time); +int mlx4_SET_PORT_fcs_check(struct mlx4_dev *dev, u8 port, + u8 ignore_fcs_value); int mlx4_SET_PORT_VXLAN(struct mlx4_dev *dev, u8 port, u8 steering, int enable); int mlx4_find_cached_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *idx); int mlx4_find_cached_vlan(struct mlx4_dev *dev, u8 port, u16 vid, int *idx); -- cgit v1.2.3 From 64599cca51de08cef94bc13a0f98351e5bb01f41 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Thu, 2 Apr 2015 17:07:25 +0300 Subject: net/mlx5_core: Use coherent memory for command interface page Use coherent memory for the commands descriptor page. Take measures to make sure the page is aligned to MLX5_ADAPTER_PAGE_SIZE as required by the hardware. Reported-by: Yevgeny Kliteynik Signed-off-by: Eli Cohen Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/linux/mlx5/driver.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 166d9315fe4b..8d8ca6d9b03b 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -232,6 +232,9 @@ struct mlx5_cmd_stats { }; struct mlx5_cmd { + void *cmd_alloc_buf; + dma_addr_t alloc_dma; + int alloc_size; void *cmd_buf; dma_addr_t dma; u16 cmdif_rev; -- cgit v1.2.3 From 302bdf68fc56a6330bc6b10ce435b4d466417537 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Thu, 2 Apr 2015 17:07:29 +0300 Subject: net/mlx5_core: Fix Mellanox copyright note Signed-off-by: Achiad Shochat Signed-off-by: Saeed Mahameed Signed-off-by: Eli Cohen Signed-off-by: David S. Miller --- include/linux/mlx5/cmd.h | 2 +- include/linux/mlx5/cq.h | 2 +- include/linux/mlx5/device.h | 2 +- include/linux/mlx5/doorbell.h | 2 +- include/linux/mlx5/driver.h | 2 +- include/linux/mlx5/mlx5_ifc.h | 2 +- include/linux/mlx5/qp.h | 2 +- include/linux/mlx5/srq.h | 2 +- 8 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/cmd.h b/include/linux/mlx5/cmd.h index 2826a4b6071e..68cd08f02c2f 100644 --- a/include/linux/mlx5/cmd.h +++ b/include/linux/mlx5/cmd.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/include/linux/mlx5/cq.h b/include/linux/mlx5/cq.h index f6b17ac601bd..72ee0d732a26 100644 --- a/include/linux/mlx5/cq.h +++ b/include/linux/mlx5/cq.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 4e5bd813bb9a..abf65c790421 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/include/linux/mlx5/doorbell.h b/include/linux/mlx5/doorbell.h index 163a818411e7..afc78a3f4462 100644 --- a/include/linux/mlx5/doorbell.h +++ b/include/linux/mlx5/doorbell.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 8d8ca6d9b03b..8fedd39a9a60 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 5f48b8f592c5..cb3ad17edd1f 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014, Mellanox Technologies inc. All rights reserved. + * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index 61f7a342d1bf..310b5f7fd6ae 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/include/linux/mlx5/srq.h b/include/linux/mlx5/srq.h index e1a363a33663..f43ed054a3e0 100644 --- a/include/linux/mlx5/srq.h +++ b/include/linux/mlx5/srq.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU -- cgit v1.2.3 From 233d05d28ad942929b6b4fbc48aa8dd083c16484 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Thu, 2 Apr 2015 17:07:32 +0300 Subject: net/mlx5_core: Move completion eqs from mlx5_ib to mlx5_core Preparation for ethernet driver. These functions will be used in drivers other than mlx5_ib. Signed-off-by: Achiad Shochat Signed-off-by: Saeed Mahameed Signed-off-by: Eli Cohen Signed-off-by: David S. Miller --- include/linux/mlx5/driver.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 8fedd39a9a60..f250f6580dad 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -410,7 +410,7 @@ struct mlx5_core_srq { struct mlx5_eq_table { void __iomem *update_ci; void __iomem *update_arm_ci; - struct list_head *comp_eq_head; + struct list_head comp_eqs_list; struct mlx5_eq pages_eq; struct mlx5_eq async_eq; struct mlx5_eq cmd_eq; @@ -725,6 +725,7 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq); int mlx5_start_eqs(struct mlx5_core_dev *dev); int mlx5_stop_eqs(struct mlx5_core_dev *dev); +int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn, int *irqn); int mlx5_core_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn); int mlx5_core_detach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn); -- cgit v1.2.3 From ce0f75093282c5dca1e79ae3e3e893deaea86166 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Thu, 2 Apr 2015 17:07:33 +0300 Subject: net/mlx5_core: Modify arm CQ in preparation for upcoming Ethernet driver Pass consumer index as a parameter to arm CQ Signed-off-by: Saeed Mahameed Signed-off-by: Eli Cohen Signed-off-by: David S. Miller --- include/linux/mlx5/cq.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/cq.h b/include/linux/mlx5/cq.h index 72ee0d732a26..2695ced222df 100644 --- a/include/linux/mlx5/cq.h +++ b/include/linux/mlx5/cq.h @@ -137,14 +137,15 @@ enum { static inline void mlx5_cq_arm(struct mlx5_core_cq *cq, u32 cmd, void __iomem *uar_page, - spinlock_t *doorbell_lock) + spinlock_t *doorbell_lock, + u32 cons_index) { __be32 doorbell[2]; u32 sn; u32 ci; sn = cq->arm_sn & 3; - ci = cq->cons_index & 0xffffff; + ci = cons_index & 0xffffff; *cq->arm_db = cpu_to_be32(sn << 28 | cmd | ci); -- cgit v1.2.3 From 64613d9499c4887485d4350387919ea507330d90 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Thu, 2 Apr 2015 17:07:34 +0300 Subject: net/mlx5_core: Extend struct mlx5_interface to support multiple protocols Preparation for ethernet driver. Signed-off-by: Achiad Shochat Signed-off-by: Saeed Mahameed Signed-off-by: Eli Cohen Signed-off-by: David S. Miller --- include/linux/mlx5/driver.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index f250f6580dad..9a90e7523dc2 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -781,14 +781,22 @@ enum { MAX_MR_CACHE_ENTRIES = 16, }; +enum { + MLX5_INTERFACE_PROTOCOL_IB = 0, + MLX5_INTERFACE_PROTOCOL_ETH = 1, +}; + struct mlx5_interface { void * (*add)(struct mlx5_core_dev *dev); void (*remove)(struct mlx5_core_dev *dev, void *context); void (*event)(struct mlx5_core_dev *dev, void *context, enum mlx5_dev_event event, unsigned long param); + void * (*get_dev)(void *context); + int protocol; struct list_head list; }; +void *mlx5_get_protocol_dev(struct mlx5_core_dev *mdev, int protocol); int mlx5_register_interface(struct mlx5_interface *intf); void mlx5_unregister_interface(struct mlx5_interface *intf); -- cgit v1.2.3 From e79d8429aac95a5cbe4c235795c7cd554c91f924 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Fri, 3 Apr 2015 22:17:17 +1030 Subject: netdevice: document NETDEV_TX_BUSY deprecation. This paraphrases DaveM (and steals some of his words) explaining why a device shouldn't return NETDEV_TX_BUSY, even though it looks so inviting to driver authors. See http://www.spinics.net/lists/netdev/msg322350.html Inspired-by: David Miller Signed-off-by: Rusty Russell Signed-off-by: David S. Miller --- include/linux/netdevice.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 846a1f5bc9db..a710d22b174f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -795,7 +795,10 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev, * netdev_tx_t (*ndo_start_xmit)(struct sk_buff *skb, * struct net_device *dev); * Called when a packet needs to be transmitted. - * Must return NETDEV_TX_OK , NETDEV_TX_BUSY. + * Returns NETDEV_TX_OK. Can return NETDEV_TX_BUSY, but you should stop + * the queue before that can happen; it's for obsolete devices and weird + * corner cases, but the stack really does a non-trivial amount + * of useless work if you return NETDEV_TX_BUSY. * (can also return NETDEV_TX_LOCKED iff NETIF_F_LLTX) * Required can not be NULL. * -- cgit v1.2.3 From 2e7056c433216f406b90a003aa0ba42e19d3bdcf Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Tue, 31 Mar 2015 14:19:10 -0700 Subject: jhash: Update jhash_[321]words functions to use correct initval Looking over the implementation for jhash2 and comparing it to jhash_3words I realized that the two hashes were in fact very different. Doing a bit of digging led me to "The new jhash implementation" in which lookup2 was supposed to have been replaced with lookup3. In reviewing the patch I noticed that jhash2 had originally initialized a and b to JHASH_GOLDENRATIO and c to initval, but after the patch a, b, and c were initialized to initval + (length << 2) + JHASH_INITVAL. However the changes in jhash_3words simply replaced the initialization of a and b with JHASH_INITVAL. This change corrects what I believe was an oversight so that a, b, and c in jhash_3words all have the same value added consisting of initval + (length << 2) + JHASH_INITVAL so that jhash2 and jhash_3words will now produce the same hash result given the same inputs. Fixes: 60d509c823cca ("The new jhash implementation") Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- include/linux/jhash.h | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/jhash.h b/include/linux/jhash.h index 47cb09edec1a..348c6f47e4cc 100644 --- a/include/linux/jhash.h +++ b/include/linux/jhash.h @@ -145,11 +145,11 @@ static inline u32 jhash2(const u32 *k, u32 length, u32 initval) } -/* jhash_3words - hash exactly 3, 2 or 1 word(s) */ -static inline u32 jhash_3words(u32 a, u32 b, u32 c, u32 initval) +/* __jhash_nwords - hash exactly 3, 2 or 1 word(s) */ +static inline u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval) { - a += JHASH_INITVAL; - b += JHASH_INITVAL; + a += initval; + b += initval; c += initval; __jhash_final(a, b, c); @@ -157,14 +157,19 @@ static inline u32 jhash_3words(u32 a, u32 b, u32 c, u32 initval) return c; } +static inline u32 jhash_3words(u32 a, u32 b, u32 c, u32 initval) +{ + return __jhash_nwords(a, b, c, initval + JHASH_INITVAL + (3 << 2)); +} + static inline u32 jhash_2words(u32 a, u32 b, u32 initval) { - return jhash_3words(a, b, 0, initval); + return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2)); } static inline u32 jhash_1word(u32 a, u32 initval) { - return jhash_3words(a, 0, 0, initval); + return __jhash_nwords(a, 0, 0, initval + JHASH_INITVAL + (1 << 2)); } #endif /* _LINUX_JHASH_H */ -- cgit v1.2.3 From a3bebdce4135a44d09e96ba66c40797c8f9fa902 Mon Sep 17 00:00:00 2001 From: Stas Sergeev Date: Wed, 1 Apr 2015 20:30:31 +0300 Subject: add fixed_phy_update_state() - update state of fixed_phy Currently fixed_phy uses a callback to periodically poll the link state. This patch adds the fixed_phy_update_state() API. It solves the following problems: - On link state interrupt, MAC driver can't update status. Instead it needs to provide the callback to periodically query the HW about the link state. It is more efficient to update status after interrupt. - The callback needs to be unregistered before phy_disconnect(), or otherwise it will be called with net_dev==NULL. phy_disconnect() does not have enough info to unregister the callback automatically. - The callback needs to be registered before of_phy_connect() to avoid running with outdated state, but of_phy_connect() returns the phy_device pointer, which is needed to register the callback. Registering it before of_phy_connect() will therefore require a hack to get the pointer earlier. Overall, this addition makes the subsequent patch that implements SGMII link status for mvneta, much cleaner. CC: Florian Fainelli CC: netdev@vger.kernel.org CC: linux-kernel@vger.kernel.org Signed-off-by: Stas Sergeev Signed-off-by: David S. Miller --- include/linux/phy_fixed.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy_fixed.h b/include/linux/phy_fixed.h index 7e75bfe37cc7..fe5732d53eda 100644 --- a/include/linux/phy_fixed.h +++ b/include/linux/phy_fixed.h @@ -21,6 +21,9 @@ extern void fixed_phy_del(int phy_addr); extern int fixed_phy_set_link_update(struct phy_device *phydev, int (*link_update)(struct net_device *, struct fixed_phy_status *)); +extern int fixed_phy_update_state(struct phy_device *phydev, + const struct fixed_phy_status *status, + const struct fixed_phy_status *changed); #else static inline int fixed_phy_add(unsigned int irq, int phy_id, struct fixed_phy_status *status) @@ -43,6 +46,12 @@ static inline int fixed_phy_set_link_update(struct phy_device *phydev, { return -ENODEV; } +static inline int fixed_phy_update_state(struct phy_device *phydev, + const struct fixed_phy_status *status, + const struct fixed_phy_status *changed) +{ + return -ENODEV; +} #endif /* CONFIG_FIXED_PHY */ #endif /* __PHY_FIXED_H */ -- cgit v1.2.3 From cfdfab314647b1755afedc33ab66f3f247e161ae Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 3 Apr 2015 16:23:58 -0400 Subject: netfilter: Create and use nf_hook_state. Instead of passing a large number of arguments down into the nf_hook() entry points, create a structure which carries this state down through the hook processing layers. This makes is so that if we want to change the types or signatures of any of these pieces of state, there are less places that need to be changed. Signed-off-by: David S. Miller --- include/linux/netfilter.h | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 2517ece98820..aee7ef1e23ed 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -44,6 +44,16 @@ int netfilter_init(void); struct sk_buff; struct nf_hook_ops; + +struct nf_hook_state { + unsigned int hook; + int thresh; + u_int8_t pf; + struct net_device *in; + struct net_device *out; + int (*okfn)(struct sk_buff *); +}; + typedef unsigned int nf_hookfn(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, @@ -118,9 +128,7 @@ static inline bool nf_hooks_active(u_int8_t pf, unsigned int hook) } #endif -int nf_hook_slow(u_int8_t pf, unsigned int hook, struct sk_buff *skb, - struct net_device *indev, struct net_device *outdev, - int (*okfn)(struct sk_buff *), int thresh); +int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state); /** * nf_hook_thresh - call a netfilter hook @@ -135,8 +143,18 @@ static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook, struct net_device *outdev, int (*okfn)(struct sk_buff *), int thresh) { - if (nf_hooks_active(pf, hook)) - return nf_hook_slow(pf, hook, skb, indev, outdev, okfn, thresh); + if (nf_hooks_active(pf, hook)) { + struct nf_hook_state state = { + .hook = hook, + .thresh = thresh, + .pf = pf, + .in = indev, + .out = outdev, + .okfn = okfn + }; + + return nf_hook_slow(skb, &state); + } return 1; } -- cgit v1.2.3 From 238e54c9cb9385a1ba99e92801f3615a2fb398b6 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 3 Apr 2015 20:32:56 -0400 Subject: netfilter: Make nf_hookfn use nf_hook_state. Pass the nf_hook_state all the way down into the hook functions themselves. Signed-off-by: David S. Miller --- include/linux/netfilter.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index aee7ef1e23ed..c480c43ad8f7 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -56,9 +56,7 @@ struct nf_hook_state { typedef unsigned int nf_hookfn(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)); + const struct nf_hook_state *state); struct nf_hook_ops { struct list_head list; -- cgit v1.2.3 From 1c491ba2592f621f21a693d43fab06302527fc0f Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 3 Apr 2015 20:56:08 -0400 Subject: netfilter: Pass nf_hook_state through ipt_do_table(). Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ip_tables.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/linux/netfilter_ipv4/ip_tables.h index 901e84db847d..4073510da485 100644 --- a/include/linux/netfilter_ipv4/ip_tables.h +++ b/include/linux/netfilter_ipv4/ip_tables.h @@ -65,8 +65,7 @@ struct ipt_error { extern void *ipt_alloc_initial_table(const struct xt_table *); extern unsigned int ipt_do_table(struct sk_buff *skb, unsigned int hook, - const struct net_device *in, - const struct net_device *out, + const struct nf_hook_state *state, struct xt_table *table); #ifdef CONFIG_COMPAT -- cgit v1.2.3 From 8f8a37152df49d541c43f010543f2b0176fcfb8e Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 3 Apr 2015 21:09:51 -0400 Subject: netfilter: Pass nf_hook_state through ip6t_do_table(). Signed-off-by: David S. Miller --- include/linux/netfilter_ipv6/ip6_tables.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h index 610208b18c05..b40d2b635778 100644 --- a/include/linux/netfilter_ipv6/ip6_tables.h +++ b/include/linux/netfilter_ipv6/ip6_tables.h @@ -31,8 +31,7 @@ extern struct xt_table *ip6t_register_table(struct net *net, extern void ip6t_unregister_table(struct net *net, struct xt_table *table); extern unsigned int ip6t_do_table(struct sk_buff *skb, unsigned int hook, - const struct net_device *in, - const struct net_device *out, + const struct nf_hook_state *state, struct xt_table *table); /* Check for an extension */ -- cgit v1.2.3 From b85c3dc9bd5347ad9540ec8d103b7c049c48b7cd Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 3 Apr 2015 21:18:46 -0400 Subject: netfilter: Pass nf_hook_state through arpt_do_table(). Signed-off-by: David S. Miller --- include/linux/netfilter_arp/arp_tables.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_arp/arp_tables.h b/include/linux/netfilter_arp/arp_tables.h index cfb7191e6efa..c22a7fb8d0df 100644 --- a/include/linux/netfilter_arp/arp_tables.h +++ b/include/linux/netfilter_arp/arp_tables.h @@ -54,8 +54,7 @@ extern struct xt_table *arpt_register_table(struct net *net, extern void arpt_unregister_table(struct xt_table *table); extern unsigned int arpt_do_table(struct sk_buff *skb, unsigned int hook, - const struct net_device *in, - const struct net_device *out, + const struct nf_hook_state *state, struct xt_table *table); #ifdef CONFIG_COMPAT -- cgit v1.2.3 From 107a9f4dc9211c1f91703d1739d7fd22ac58b332 Mon Sep 17 00:00:00 2001 From: David Miller Date: Sun, 5 Apr 2015 22:18:54 -0400 Subject: netfilter: Add nf_hook_state initializer function. This way we can consolidate where we setup new nf_hook_state objects, to make sure the entire thing is initialized. The only other place an nf_hook_object is instantiated is nf_queue, wherein a structure copy is used. Signed-off-by: David S. Miller --- include/linux/netfilter.h | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index c480c43ad8f7..b8c88f3c85ff 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -54,6 +54,21 @@ struct nf_hook_state { int (*okfn)(struct sk_buff *); }; +static inline void nf_hook_state_init(struct nf_hook_state *p, + unsigned int hook, + int thresh, u_int8_t pf, + struct net_device *indev, + struct net_device *outdev, + int (*okfn)(struct sk_buff *)) +{ + p->hook = hook; + p->thresh = thresh; + p->pf = pf; + p->in = indev; + p->out = outdev; + p->okfn = okfn; +} + typedef unsigned int nf_hookfn(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct nf_hook_state *state); @@ -142,15 +157,10 @@ static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook, int (*okfn)(struct sk_buff *), int thresh) { if (nf_hooks_active(pf, hook)) { - struct nf_hook_state state = { - .hook = hook, - .thresh = thresh, - .pf = pf, - .in = indev, - .out = outdev, - .okfn = okfn - }; + struct nf_hook_state state; + nf_hook_state_init(&state, hook, thresh, pf, + indev, outdev, okfn); return nf_hook_slow(skb, &state); } return 1; -- cgit v1.2.3 From 1c984f8a5df085bcf35364a8a870bd4db4da4ed3 Mon Sep 17 00:00:00 2001 From: David Miller Date: Sun, 5 Apr 2015 22:19:00 -0400 Subject: netfilter: Add socket pointer to nf_hook_state. It is currently always set to NULL, but nf_queue is adjusted to be prepared for it being set to a real socket by taking and releasing a reference to that socket when necessary. Signed-off-by: David S. Miller --- include/linux/netfilter.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index b8c88f3c85ff..f8f58fab2402 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -45,12 +45,15 @@ struct sk_buff; struct nf_hook_ops; +struct sock; + struct nf_hook_state { unsigned int hook; int thresh; u_int8_t pf; struct net_device *in; struct net_device *out; + struct sock *sk; int (*okfn)(struct sk_buff *); }; @@ -59,6 +62,7 @@ static inline void nf_hook_state_init(struct nf_hook_state *p, int thresh, u_int8_t pf, struct net_device *indev, struct net_device *outdev, + struct sock *sk, int (*okfn)(struct sk_buff *)) { p->hook = hook; @@ -66,6 +70,7 @@ static inline void nf_hook_state_init(struct nf_hook_state *p, p->pf = pf; p->in = indev; p->out = outdev; + p->sk = sk; p->okfn = okfn; } @@ -160,7 +165,7 @@ static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook, struct nf_hook_state state; nf_hook_state_init(&state, hook, thresh, pf, - indev, outdev, okfn); + indev, outdev, NULL, okfn); return nf_hook_slow(skb, &state); } return 1; -- cgit v1.2.3 From 7026b1ddb6b8d4e6ee33dc2bd06c0ca8746fa7ab Mon Sep 17 00:00:00 2001 From: David Miller Date: Sun, 5 Apr 2015 22:19:04 -0400 Subject: netfilter: Pass socket pointer down through okfn(). On the output paths in particular, we have to sometimes deal with two socket contexts. First, and usually skb->sk, is the local socket that generated the frame. And second, is potentially the socket used to control a tunneling socket, such as one the encapsulates using UDP. We do not want to disassociate skb->sk when encapsulating in order to fix this, because that would break socket memory accounting. The most extreme case where this can cause huge problems is an AF_PACKET socket transmitting over a vxlan device. We hit code paths doing checks that assume they are dealing with an ipv4 socket, but are actually operating upon the AF_PACKET one. Signed-off-by: David S. Miller --- include/linux/netdevice.h | 14 +++++++-- include/linux/netfilter.h | 62 ++++++++++++++++++++++------------------ include/linux/netfilter_bridge.h | 2 +- 3 files changed, 46 insertions(+), 32 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 41bf58a2b936..45823db2efb0 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2165,8 +2165,12 @@ int dev_open(struct net_device *dev); int dev_close(struct net_device *dev); int dev_close_many(struct list_head *head, bool unlink); void dev_disable_lro(struct net_device *dev); -int dev_loopback_xmit(struct sk_buff *newskb); -int dev_queue_xmit(struct sk_buff *skb); +int dev_loopback_xmit(struct sock *sk, struct sk_buff *newskb); +int dev_queue_xmit_sk(struct sock *sk, struct sk_buff *skb); +static inline int dev_queue_xmit(struct sk_buff *skb) +{ + return dev_queue_xmit_sk(skb->sk, skb); +} int dev_queue_xmit_accel(struct sk_buff *skb, void *accel_priv); int register_netdevice(struct net_device *dev); void unregister_netdevice_queue(struct net_device *dev, struct list_head *head); @@ -2927,7 +2931,11 @@ static inline void dev_consume_skb_any(struct sk_buff *skb) int netif_rx(struct sk_buff *skb); int netif_rx_ni(struct sk_buff *skb); -int netif_receive_skb(struct sk_buff *skb); +int netif_receive_skb_sk(struct sock *sk, struct sk_buff *skb); +static inline int netif_receive_skb(struct sk_buff *skb) +{ + return netif_receive_skb_sk(skb->sk, skb); +} gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb); void napi_gro_flush(struct napi_struct *napi, bool flush_old); struct sk_buff *napi_get_frags(struct napi_struct *napi); diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index f8f58fab2402..63560d0a8dfe 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -54,7 +54,7 @@ struct nf_hook_state { struct net_device *in; struct net_device *out; struct sock *sk; - int (*okfn)(struct sk_buff *); + int (*okfn)(struct sock *, struct sk_buff *); }; static inline void nf_hook_state_init(struct nf_hook_state *p, @@ -63,7 +63,7 @@ static inline void nf_hook_state_init(struct nf_hook_state *p, struct net_device *indev, struct net_device *outdev, struct sock *sk, - int (*okfn)(struct sk_buff *)) + int (*okfn)(struct sock *, struct sk_buff *)) { p->hook = hook; p->thresh = thresh; @@ -156,26 +156,29 @@ int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state); * value indicates the packet has been consumed by the hook. */ static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook, + struct sock *sk, struct sk_buff *skb, struct net_device *indev, struct net_device *outdev, - int (*okfn)(struct sk_buff *), int thresh) + int (*okfn)(struct sock *, struct sk_buff *), + int thresh) { if (nf_hooks_active(pf, hook)) { struct nf_hook_state state; nf_hook_state_init(&state, hook, thresh, pf, - indev, outdev, NULL, okfn); + indev, outdev, sk, okfn); return nf_hook_slow(skb, &state); } return 1; } -static inline int nf_hook(u_int8_t pf, unsigned int hook, struct sk_buff *skb, - struct net_device *indev, struct net_device *outdev, - int (*okfn)(struct sk_buff *)) +static inline int nf_hook(u_int8_t pf, unsigned int hook, struct sock *sk, + struct sk_buff *skb, struct net_device *indev, + struct net_device *outdev, + int (*okfn)(struct sock *, struct sk_buff *)) { - return nf_hook_thresh(pf, hook, skb, indev, outdev, okfn, INT_MIN); + return nf_hook_thresh(pf, hook, sk, skb, indev, outdev, okfn, INT_MIN); } /* Activate hook; either okfn or kfree_skb called, unless a hook @@ -196,35 +199,36 @@ static inline int nf_hook(u_int8_t pf, unsigned int hook, struct sk_buff *skb, */ static inline int -NF_HOOK_THRESH(uint8_t pf, unsigned int hook, struct sk_buff *skb, - struct net_device *in, struct net_device *out, - int (*okfn)(struct sk_buff *), int thresh) +NF_HOOK_THRESH(uint8_t pf, unsigned int hook, struct sock *sk, + struct sk_buff *skb, struct net_device *in, + struct net_device *out, + int (*okfn)(struct sock *, struct sk_buff *), int thresh) { - int ret = nf_hook_thresh(pf, hook, skb, in, out, okfn, thresh); + int ret = nf_hook_thresh(pf, hook, sk, skb, in, out, okfn, thresh); if (ret == 1) - ret = okfn(skb); + ret = okfn(sk, skb); return ret; } static inline int -NF_HOOK_COND(uint8_t pf, unsigned int hook, struct sk_buff *skb, - struct net_device *in, struct net_device *out, - int (*okfn)(struct sk_buff *), bool cond) +NF_HOOK_COND(uint8_t pf, unsigned int hook, struct sock *sk, + struct sk_buff *skb, struct net_device *in, struct net_device *out, + int (*okfn)(struct sock *, struct sk_buff *), bool cond) { int ret; if (!cond || - ((ret = nf_hook_thresh(pf, hook, skb, in, out, okfn, INT_MIN)) == 1)) - ret = okfn(skb); + ((ret = nf_hook_thresh(pf, hook, sk, skb, in, out, okfn, INT_MIN)) == 1)) + ret = okfn(sk, skb); return ret; } static inline int -NF_HOOK(uint8_t pf, unsigned int hook, struct sk_buff *skb, +NF_HOOK(uint8_t pf, unsigned int hook, struct sock *sk, struct sk_buff *skb, struct net_device *in, struct net_device *out, - int (*okfn)(struct sk_buff *)) + int (*okfn)(struct sock *, struct sk_buff *)) { - return NF_HOOK_THRESH(pf, hook, skb, in, out, okfn, INT_MIN); + return NF_HOOK_THRESH(pf, hook, sk, skb, in, out, okfn, INT_MIN); } /* Call setsockopt() */ @@ -324,19 +328,21 @@ nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family) } #else /* !CONFIG_NETFILTER */ -#define NF_HOOK(pf, hook, skb, indev, outdev, okfn) (okfn)(skb) -#define NF_HOOK_COND(pf, hook, skb, indev, outdev, okfn, cond) (okfn)(skb) +#define NF_HOOK(pf, hook, sk, skb, indev, outdev, okfn) (okfn)(sk, skb) +#define NF_HOOK_COND(pf, hook, sk, skb, indev, outdev, okfn, cond) (okfn)(sk, skb) static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook, + struct sock *sk, struct sk_buff *skb, struct net_device *indev, struct net_device *outdev, - int (*okfn)(struct sk_buff *), int thresh) + int (*okfn)(struct sock *sk, struct sk_buff *), int thresh) { - return okfn(skb); + return okfn(sk, skb); } -static inline int nf_hook(u_int8_t pf, unsigned int hook, struct sk_buff *skb, - struct net_device *indev, struct net_device *outdev, - int (*okfn)(struct sk_buff *)) +static inline int nf_hook(u_int8_t pf, unsigned int hook, struct sock *sk, + struct sk_buff *skb, struct net_device *indev, + struct net_device *outdev, + int (*okfn)(struct sock *, struct sk_buff *)) { return 1; } diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h index 2734977199ca..5fc0a0fe244b 100644 --- a/include/linux/netfilter_bridge.h +++ b/include/linux/netfilter_bridge.h @@ -30,7 +30,7 @@ static inline unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb) return 0; } -int br_handle_frame_finish(struct sk_buff *skb); +int br_handle_frame_finish(struct sock *sk, struct sk_buff *skb); static inline void br_drop_fake_rtable(struct sk_buff *skb) { -- cgit v1.2.3 From 388069d30240e9524ad69ce6cc692fff2a5a7829 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Tue, 7 Apr 2015 12:10:16 +0200 Subject: netdevice.h: remove iflink description Also move 'group' description to match the order of the net_device structure. Fixes: 7a66bbc96ce9 ("net: remove iflink field from struct net_device") Reported-by: Fengguang Wu Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 45823db2efb0..bf6d9df34d7b 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1328,7 +1328,7 @@ enum netdev_priv_flags { * @mpls_features: Mask of features inheritable by MPLS * * @ifindex: interface index - * @iflink: unique device identifier + * @group: The group, that the device belongs to * * @stats: Statistics struct, which was left as a legacy, use * rtnl_link_stats64 instead @@ -1488,7 +1488,6 @@ enum netdev_priv_flags { * * @qdisc_tx_busylock: XXX: need comments on this one * - * @group: The group, that the device belongs to * @pm_qos_req: Power Management QoS object * * FIXME: cleanup struct net_device such that network protocol info -- cgit v1.2.3 From 7f9b838b71eb78a27de27a12ca5de8542fac3115 Mon Sep 17 00:00:00 2001 From: Daniel Lee Date: Mon, 6 Apr 2015 14:37:26 -0700 Subject: tcp: RFC7413 option support for Fast Open server Fast Open has been using the experimental option with a magic number (RFC6994) to request and grant Fast Open cookies. This patch enables the server to support the official IANA option 34 in RFC7413 in addition. The change has passed all existing Fast Open tests with both old and new options at Google. Signed-off-by: Daniel Lee Signed-off-by: Yuchung Cheng Signed-off-by: Neal Cardwell Signed-off-by: David S. Miller --- include/linux/tcp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index f869ae8afbaf..a48d00318683 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -58,6 +58,7 @@ static inline unsigned int tcp_optlen(const struct sk_buff *skb) struct tcp_fastopen_cookie { s8 len; u8 val[TCP_FASTOPEN_COOKIE_MAX]; + bool exp; /* In RFC6994 experimental option format */ }; /* This defines a selective acknowledgement block. */ -- cgit v1.2.3 From 2646c831c00c5d22aa72b79d24069c1b412cda7c Mon Sep 17 00:00:00 2001 From: Daniel Lee Date: Mon, 6 Apr 2015 14:37:27 -0700 Subject: tcp: RFC7413 option support for Fast Open client Fast Open has been using an experimental option with a magic number (RFC6994). This patch makes the client by default use the RFC7413 option (34) to get and send Fast Open cookies. This patch makes the client solicit cookies from a given server first with the RFC7413 option. If that fails to elicit a cookie, then it tries the RFC6994 experimental option. If that also fails, it uses the RFC7413 option on all subsequent connect attempts. If the server returns a Fast Open cookie then the client caches the form of the option that successfully elicited a cookie, and uses that form on later connects when it presents that cookie. The idea is to gradually obsolete the use of experimental options as the servers and clients upgrade, while keeping the interoperability meanwhile. Signed-off-by: Daniel Lee Signed-off-by: Yuchung Cheng Signed-off-by: Neal Cardwell Signed-off-by: David S. Miller --- include/linux/tcp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index a48d00318683..0caa3a2d4106 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -189,6 +189,7 @@ struct tcp_sock { u8 do_early_retrans:1,/* Enable RFC5827 early-retransmit */ syn_data:1, /* SYN includes data */ syn_fastopen:1, /* SYN includes Fast Open option */ + syn_fastopen_exp:1,/* SYN includes Fast Open exp. option */ syn_data_acked:1,/* data in SYN is acked by SYN-ACK */ is_cwnd_limited:1;/* forward progress limited by snd_cwnd? */ u32 tlp_high_seq; /* snd_nxt at the time of TLP retransmit. */ -- cgit v1.2.3 From e70deecbf8e1562cac0b19f23848919e2f5d65aa Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 2 Apr 2015 14:31:40 +0200 Subject: netfilter: bridge: don't use nf_bridge_info data to store mac header br_netfilter maintains an extra state, nf_bridge_info, which is attached to skb via skb->nf_bridge pointer. Amongst other things we use skb->nf_bridge->data to store the original mac header for every processed skb. This is required for ip refragmentation when using conntrack on top of bridge, because ip_fragment doesn't copy it from original skb. However there is no need anymore to do this unconditionally. Move this to the one place where its needed -- when br_netfilter calls ip_fragment(). Also switch to percpu storage for this so we can handle fragmenting without accessing nf_bridge meta data. Only user left is neigh resolution when DNAT is detected, to hold the original source mac address (neigh resolution builds new mac header using bridge mac), so rename ->data and reduce its size to whats needed. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/skbuff.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 36f3f43c0117..f66a089afc41 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -169,7 +169,7 @@ struct nf_bridge_info { unsigned int mask; struct net_device *physindev; struct net_device *physoutdev; - unsigned long data[32 / sizeof(unsigned long)]; + char neigh_header[8]; }; #endif -- cgit v1.2.3 From c737b7c4510026c200e14de51eb0006adea0fb2f Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 2 Apr 2015 14:31:41 +0200 Subject: netfilter: bridge: add helpers for fetching physin/outdev right now we store this in the nf_bridge_info struct, accessible via skb->nf_bridge. This patch prepares removal of this pointer from skb: Instead of using skb->nf_bridge->x, we use helpers to obtain the in/out device (or ifindexes). Followup patches to netfilter will then allow nf_bridge_info to be obtained by a call into the br_netfilter core, rather than keeping a pointer to it in sk_buff. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_bridge.h | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h index 2734977199ca..e1d96bc2767c 100644 --- a/include/linux/netfilter_bridge.h +++ b/include/linux/netfilter_bridge.h @@ -2,7 +2,7 @@ #define __LINUX_BRIDGE_NETFILTER_H #include - +#include enum nf_br_hook_priorities { NF_BR_PRI_FIRST = INT_MIN, @@ -40,6 +40,27 @@ static inline void br_drop_fake_rtable(struct sk_buff *skb) skb_dst_drop(skb); } +static inline int nf_bridge_get_physinif(const struct sk_buff *skb) +{ + return skb->nf_bridge ? skb->nf_bridge->physindev->ifindex : 0; +} + +static inline int nf_bridge_get_physoutif(const struct sk_buff *skb) +{ + return skb->nf_bridge ? skb->nf_bridge->physoutdev->ifindex : 0; +} + +static inline struct net_device * +nf_bridge_get_physindev(const struct sk_buff *skb) +{ + return skb->nf_bridge ? skb->nf_bridge->physindev : NULL; +} + +static inline struct net_device * +nf_bridge_get_physoutdev(const struct sk_buff *skb) +{ + return skb->nf_bridge ? skb->nf_bridge->physoutdev : NULL; +} #else #define br_drop_fake_rtable(skb) do { } while (0) #endif /* CONFIG_BRIDGE_NETFILTER */ -- cgit v1.2.3 From 3eaf402502e49ad9c58c73e8599c7c4f345d62da Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 2 Apr 2015 14:31:44 +0200 Subject: netfilter: bridge: start splitting mask into public/private chunks ->mask is a bit info field that mixes various use cases. In particular, we have flags that are mutually exlusive, and flags that are only used within br_netfilter while others need to be exposed to other parts of the kernel. Remove BRNF_8021Q/PPPoE flags. They're mutually exclusive and only needed within br_netfilter context. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_bridge.h | 4 +--- include/linux/skbuff.h | 5 +++++ 2 files changed, 6 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h index e1d96bc2767c..d47a32dffa15 100644 --- a/include/linux/netfilter_bridge.h +++ b/include/linux/netfilter_bridge.h @@ -20,12 +20,10 @@ enum nf_br_hook_priorities { #define BRNF_PKT_TYPE 0x01 #define BRNF_BRIDGED_DNAT 0x02 #define BRNF_NF_BRIDGE_PREROUTING 0x08 -#define BRNF_8021Q 0x10 -#define BRNF_PPPoE 0x20 static inline unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb) { - if (unlikely(skb->nf_bridge->mask & BRNF_PPPoE)) + if (skb->nf_bridge->orig_proto == BRNF_PROTO_PPPOE) return PPPOE_SES_HLEN; return 0; } diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index f66a089afc41..6f75fb5c6ed7 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -166,6 +166,11 @@ struct nf_conntrack { #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) struct nf_bridge_info { atomic_t use; + enum { + BRNF_PROTO_UNCHANGED, + BRNF_PROTO_8021Q, + BRNF_PROTO_PPPOE + } orig_proto; unsigned int mask; struct net_device *physindev; struct net_device *physoutdev; -- cgit v1.2.3 From a1e67951e6c0b11bb11c256f8e1c45ed51fcd760 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 2 Apr 2015 14:31:45 +0200 Subject: netfilter: bridge: make BRNF_PKT_TYPE flag a bool nf_bridge_info->mask is used for several things, for example to remember if skb->pkt_type was set to OTHER_HOST. For a bridge, OTHER_HOST is expected case. For ip forward its a non-starter though -- routing expects PACKET_HOST. Bridge netfilter thus changes OTHER_HOST to PACKET_HOST before hook invocation and then un-does it after hook traversal. This information is irrelevant outside of br_netfilter. After this change, ->mask now only contains flags that need to be known outside of br_netfilter in fast-path. Future patch changes mask into a 2bit state field in sk_buff, so that we can remove skb->nf_bridge pointer for good and consider all remaining places that access nf_bridge info content a not-so fastpath. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_bridge.h | 1 - include/linux/skbuff.h | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h index d47a32dffa15..8912e8c355fd 100644 --- a/include/linux/netfilter_bridge.h +++ b/include/linux/netfilter_bridge.h @@ -17,7 +17,6 @@ enum nf_br_hook_priorities { #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) -#define BRNF_PKT_TYPE 0x01 #define BRNF_BRIDGED_DNAT 0x02 #define BRNF_NF_BRIDGE_PREROUTING 0x08 diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 6f75fb5c6ed7..0991259643d6 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -171,6 +171,7 @@ struct nf_bridge_info { BRNF_PROTO_8021Q, BRNF_PROTO_PPPOE } orig_proto; + bool pkt_otherhost; unsigned int mask; struct net_device *physindev; struct net_device *physoutdev; -- cgit v1.2.3 From 3bca4cf6022ee2ff1ff2142bed28a6fca00d3a44 Mon Sep 17 00:00:00 2001 From: Alessio Igor Bogani Date: Wed, 8 Apr 2015 12:15:18 +0200 Subject: net: phy: broadcom: Add BCM54616S phy entry Signed-off-by: Alessio Igor Bogani Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/brcmphy.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index 7ccd928cc1f2..1c9920b38fa1 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -11,6 +11,7 @@ #define PHY_ID_BCM5421 0x002060e0 #define PHY_ID_BCM5464 0x002060b0 #define PHY_ID_BCM5461 0x002060c0 +#define PHY_ID_BCM54616S 0x03625d10 #define PHY_ID_BCM57780 0x03625d90 #define PHY_ID_BCM7250 0xae025280 -- cgit v1.2.3 From 01a3d796813d6302af9f828f34b73d21a4b96c9a Mon Sep 17 00:00:00 2001 From: Vlad Zolotarov Date: Mon, 30 Mar 2015 21:35:23 +0300 Subject: if_link: Add an additional parameter to ifla_vf_info for RSS querying Add configuration setting for drivers to allow/block an RSS Redirection Table and a Hash Key querying for discrete VFs. On some devices VF share the mentioned above information with PF and querying it may adduce a theoretical security risk. We want to let a system administrator to decide if he/she wants to take this risk or not. Signed-off-by: Vlad Zolotarov Tested-by: Phil Schmitt Signed-off-by: Jeff Kirsher --- include/linux/if_link.h | 1 + include/linux/netdevice.h | 8 ++++++++ 2 files changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/if_link.h b/include/linux/if_link.h index 119130e9298b..da4929927f69 100644 --- a/include/linux/if_link.h +++ b/include/linux/if_link.h @@ -14,5 +14,6 @@ struct ifla_vf_info { __u32 linkstate; __u32 min_tx_rate; __u32 max_tx_rate; + __u32 rss_query_en; }; #endif /* _LINUX_IF_LINK_H */ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index bf6d9df34d7b..13acb3d8ecdd 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -878,6 +878,11 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev, * int (*ndo_set_vf_link_state)(struct net_device *dev, int vf, int link_state); * int (*ndo_set_vf_port)(struct net_device *dev, int vf, * struct nlattr *port[]); + * + * Enable or disable the VF ability to query its RSS Redirection Table and + * Hash Key. This is needed since on some devices VF share this information + * with PF and querying it may adduce a theoretical security risk. + * int (*ndo_set_vf_rss_query_en)(struct net_device *dev, int vf, bool setting); * int (*ndo_get_vf_port)(struct net_device *dev, int vf, struct sk_buff *skb); * int (*ndo_setup_tc)(struct net_device *dev, u8 tc) * Called to setup 'tc' number of traffic classes in the net device. This @@ -1099,6 +1104,9 @@ struct net_device_ops { struct nlattr *port[]); int (*ndo_get_vf_port)(struct net_device *dev, int vf, struct sk_buff *skb); + int (*ndo_set_vf_rss_query_en)( + struct net_device *dev, + int vf, bool setting); int (*ndo_setup_tc)(struct net_device *dev, u8 tc); #if IS_ENABLED(CONFIG_FCOE) int (*ndo_fcoe_enable)(struct net_device *dev); -- cgit v1.2.3 From d8725c86aebaf3516e220760aaf5fefc73825188 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 11 Dec 2014 00:02:50 -0500 Subject: get rid of the size argument of sock_sendmsg() it's equal to iov_iter_count(&msg->msg_iter) in all cases Signed-off-by: Al Viro --- include/linux/net.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/net.h b/include/linux/net.h index e74114bcca68..738ea48be889 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -211,7 +211,7 @@ int sock_create(int family, int type, int proto, struct socket **res); int sock_create_kern(int family, int type, int proto, struct socket **res); int sock_create_lite(int family, int type, int proto, struct socket **res); void sock_release(struct socket *sock); -int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t len); +int sock_sendmsg(struct socket *sock, struct msghdr *msg); int sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int flags); struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname); -- cgit v1.2.3 From 01e97e6517053d7c0b9af5248e944a9209909cf5 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 15 Dec 2014 21:39:31 -0500 Subject: new helper: msg_data_left() convert open-coded instances Signed-off-by: Al Viro --- include/linux/socket.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/socket.h b/include/linux/socket.h index c9852ef7e317..5bf59c8493b7 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -139,6 +139,11 @@ static inline struct cmsghdr * cmsg_nxthdr (struct msghdr *__msg, struct cmsghdr return __cmsg_nxthdr(__msg->msg_control, __msg->msg_controllen, __cmsg); } +static inline size_t msg_data_left(struct msghdr *msg) +{ + return iov_iter_count(&msg->msg_iter); +} + /* "Socket"-level control message types: */ #define SCM_RIGHTS 0x01 /* rw: access rights (array of int) */ -- cgit v1.2.3 From e2c6544829f8df396a0a233c86d5ee78f405ffef Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Fri, 10 Apr 2015 15:52:37 +0200 Subject: e1000e: Move pm_qos_req to e1000e adapter e1000e is the only driver requiring pm_qos_req, instead of causing every device to waste up to 240 bytes. Allocate it for the specific driver. Signed-off-by: Thomas Graf Acked-by: Jeff Kirsher Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 13acb3d8ecdd..733d24f16cf8 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1751,7 +1751,6 @@ struct net_device { #endif struct phy_device *phydev; struct lock_class_key *qdisc_tx_busylock; - struct pm_qos_request pm_qos_req; }; #define to_net_dev(d) container_of(d, struct net_device, dev) -- cgit v1.2.3 From 14ffbbb8dab670393a8718adb7b0a385b23fa7bc Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Fri, 10 Apr 2015 15:52:38 +0200 Subject: net_device: Reorder members to fill holes Some trivial reorders while preserving the RX/TX cache lines split to fill a couple of holes. Signed-off-by: Thomas Graf Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 45 +++++++++++++++++++++------------------------ 1 file changed, 21 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 733d24f16cf8..b5679aed660b 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1313,6 +1313,8 @@ enum netdev_priv_flags { * @base_addr: Device I/O address * @irq: Device IRQ number * + * @carrier_changes: Stats to monitor carrier on<->off transitions + * * @state: Generic network queuing layer state, see netdev_state_t * @dev_list: The global list of network devices * @napi_list: List entry, that is used for polling napi devices @@ -1346,8 +1348,6 @@ enum netdev_priv_flags { * @tx_dropped: Dropped packets by core network, * do not use this in drivers * - * @carrier_changes: Stats to monitor carrier on<->off transitions - * * @wireless_handlers: List of functions to handle Wireless Extensions, * instead of ioctl, * see for details. @@ -1390,14 +1390,14 @@ enum netdev_priv_flags { * @dev_port: Used to differentiate devices that share * the same function * @addr_list_lock: XXX: need comments on this one - * @uc: unicast mac addresses - * @mc: multicast mac addresses - * @dev_addrs: list of device hw addresses - * @queues_kset: Group of all Kobjects in the Tx and RX queues * @uc_promisc: Counter, that indicates, that promiscuous mode * has been enabled due to the need to listen to * additional unicast addresses in a device that * does not implement ndo_set_rx_mode() + * @uc: unicast mac addresses + * @mc: multicast mac addresses + * @dev_addrs: list of device hw addresses + * @queues_kset: Group of all Kobjects in the Tx and RX queues * @promiscuity: Number of times, the NIC is told to work in * Promiscuous mode, if it becomes 0 the NIC will * exit from working in Promiscuous mode @@ -1427,6 +1427,12 @@ enum netdev_priv_flags { * @ingress_queue: XXX: need comments on this one * @broadcast: hw bcast address * + * @rx_cpu_rmap: CPU reverse-mapping for RX completion interrupts, + * indexed by RX queue number. Assigned by driver. + * This must only be set if the ndo_rx_flow_steer + * operation is defined + * @index_hlist: Device index hash chain + * * @_tx: Array of TX queues * @num_tx_queues: Number of TX queues allocated at alloc_netdev_mq() time * @real_num_tx_queues: Number of TX queues currently active in device @@ -1436,11 +1442,6 @@ enum netdev_priv_flags { * * @xps_maps: XXX: need comments on this one * - * @rx_cpu_rmap: CPU reverse-mapping for RX completion interrupts, - * indexed by RX queue number. Assigned by driver. - * This must only be set if the ndo_rx_flow_steer - * operation is defined - * * @trans_start: Time (in jiffies) of last Tx * @watchdog_timeo: Represents the timeout that is used by * the watchdog ( see dev_watchdog() ) @@ -1448,7 +1449,6 @@ enum netdev_priv_flags { * * @pcpu_refcnt: Number of references to this device * @todo_list: Delayed register/unregister - * @index_hlist: Device index hash chain * @link_watch_list: XXX: need comments on this one * * @reg_state: Register/unregister state machine @@ -1515,6 +1515,8 @@ struct net_device { unsigned long base_addr; int irq; + atomic_t carrier_changes; + /* * Some hardware also needs these fields (state,dev_list, * napi_list,unreg_list,close_list) but they are not @@ -1555,8 +1557,6 @@ struct net_device { atomic_long_t rx_dropped; atomic_long_t tx_dropped; - atomic_t carrier_changes; - #ifdef CONFIG_WIRELESS_EXT const struct iw_handler_def * wireless_handlers; struct iw_public_data * wireless_data; @@ -1596,6 +1596,8 @@ struct net_device { unsigned short dev_id; unsigned short dev_port; spinlock_t addr_list_lock; + unsigned char name_assign_type; + bool uc_promisc; struct netdev_hw_addr_list uc; struct netdev_hw_addr_list mc; struct netdev_hw_addr_list dev_addrs; @@ -1603,10 +1605,6 @@ struct net_device { #ifdef CONFIG_SYSFS struct kset *queues_kset; #endif - - unsigned char name_assign_type; - - bool uc_promisc; unsigned int promiscuity; unsigned int allmulti; @@ -1653,7 +1651,10 @@ struct net_device { struct netdev_queue __rcu *ingress_queue; unsigned char broadcast[MAX_ADDR_LEN]; - +#ifdef CONFIG_RFS_ACCEL + struct cpu_rmap *rx_cpu_rmap; +#endif + struct hlist_node index_hlist; /* * Cache lines mostly used on transmit path @@ -1664,13 +1665,11 @@ struct net_device { struct Qdisc *qdisc; unsigned long tx_queue_len; spinlock_t tx_global_lock; + int watchdog_timeo; #ifdef CONFIG_XPS struct xps_dev_maps __rcu *xps_maps; #endif -#ifdef CONFIG_RFS_ACCEL - struct cpu_rmap *rx_cpu_rmap; -#endif /* These may be needed for future network-power-down code. */ @@ -1680,13 +1679,11 @@ struct net_device { */ unsigned long trans_start; - int watchdog_timeo; struct timer_list watchdog_timer; int __percpu *pcpu_refcnt; struct list_head todo_list; - struct hlist_node index_hlist; struct list_head link_watch_list; enum { NETREG_UNINITIALIZED=0, -- cgit v1.2.3 From 4577139b2dabf58973d59d157aae4ddd3bde863a Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 10 Apr 2015 23:07:54 +0200 Subject: net: use jump label patching for ingress qdisc in __netif_receive_skb_core Even if we make use of classifier and actions from the egress path, we're going into handle_ing() executing additional code on a per-packet cost for ingress qdisc, just to realize that nothing is attached on ingress. Instead, this can just be blinded out as a no-op entirely with the use of a static key. On input fast-path, we already make use of static keys in various places, e.g. skb time stamping, in RPS, etc. It makes sense to not waste time when we're assured that no ingress qdisc is attached anywhere. Enabling/disabling of that code path is being done via two helpers, namely net_{inc,dec}_ingress_queue(), that are being invoked under RTNL mutex when a ingress qdisc is being either initialized or destructed. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/rtnetlink.h | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 5db76a32fcab..2da5d1081ad9 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -77,7 +77,20 @@ static inline struct netdev_queue *dev_ingress_queue(struct net_device *dev) return rtnl_dereference(dev->ingress_queue); } -extern struct netdev_queue *dev_ingress_queue_create(struct net_device *dev); +struct netdev_queue *dev_ingress_queue_create(struct net_device *dev); + +#ifdef CONFIG_NET_CLS_ACT +void net_inc_ingress_queue(void); +void net_dec_ingress_queue(void); +#else +static inline void net_inc_ingress_queue(void) +{ +} + +static inline void net_dec_ingress_queue(void) +{ +} +#endif extern void rtnetlink_init(void); extern void __rtnl_unlock(void); -- cgit v1.2.3 From 24477e57412a7a7dea62637ac990bc5c1cff0665 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 8 Apr 2015 19:41:40 +0200 Subject: uapi: ebtables: don't include linux/if.h linux/if.h creates conflicts in userspace with net/if.h By using it here we force userspace to use linux/if.h while net/if.h may be needed. Note that: include/linux/netfilter_ipv4/ip_tables.h and include/linux/netfilter_ipv6/ip6_tables.h don't include linux/if.h and they also refer to IFNAMSIZ, so they are expecting userspace to include use net/if.h from the client program. Signed-off-by: Arturo Borrero Gonzalez Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_bridge/ebtables.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h index 34e7a2b7f867..f1bd3962e6b6 100644 --- a/include/linux/netfilter_bridge/ebtables.h +++ b/include/linux/netfilter_bridge/ebtables.h @@ -12,9 +12,10 @@ #ifndef __LINUX_BRIDGE_EFF_H #define __LINUX_BRIDGE_EFF_H +#include +#include #include - /* return values for match() functions */ #define EBT_MATCH 0 #define EBT_NOMATCH 1 -- cgit v1.2.3 From 0392d099ab29767b8d7d4d1d29e8020f8abe943f Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Mon, 13 Apr 2015 00:52:35 +0200 Subject: netlink: Fix portid type in netlink_notify portid is an unsigned integer. Fix netlink_notify to match all other portid user in the kernel. Signed-off-by: Richard Weinberger Signed-off-by: David S. Miller --- include/linux/netlink.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 02fc86d2348e..6835c1279df7 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -134,7 +134,7 @@ struct netlink_callback { struct netlink_notify { struct net *net; - int portid; + u32 portid; int protocol; }; -- cgit v1.2.3