diff options
Diffstat (limited to 'net')
680 files changed, 39658 insertions, 16793 deletions
diff --git a/net/802/fc.c b/net/802/fc.c index 1e49f2d4ea96..bd345f3d29f8 100644 --- a/net/802/fc.c +++ b/net/802/fc.c @@ -27,6 +27,7 @@ #include <linux/net.h> #include <linux/proc_fs.h> #include <linux/init.h> +#include <linux/export.h> #include <net/arp.h> /* diff --git a/net/802/garp.c b/net/802/garp.c index 16102951d36a..8e21b6db3981 100644 --- a/net/802/garp.c +++ b/net/802/garp.c @@ -15,6 +15,7 @@ #include <linux/rtnetlink.h> #include <linux/llc.h> #include <linux/slab.h> +#include <linux/module.h> #include <net/llc.h> #include <net/llc_pdu.h> #include <net/garp.h> @@ -553,7 +554,7 @@ static void garp_release_port(struct net_device *dev) if (rtnl_dereference(port->applicants[i])) return; } - rcu_assign_pointer(dev->garp_port, NULL); + RCU_INIT_POINTER(dev->garp_port, NULL); kfree_rcu(port, rcu); } @@ -605,7 +606,7 @@ void garp_uninit_applicant(struct net_device *dev, struct garp_application *appl ASSERT_RTNL(); - rcu_assign_pointer(port->applicants[appl->type], NULL); + RCU_INIT_POINTER(port->applicants[appl->type], NULL); /* Delete timer and generate a final TRANSMIT_PDU event to flush out * all pending messages before the applicant is gone. */ diff --git a/net/802/stp.c b/net/802/stp.c index 978c30b1b36b..15540b7323cd 100644 --- a/net/802/stp.c +++ b/net/802/stp.c @@ -12,6 +12,7 @@ #include <linux/etherdevice.h> #include <linux/llc.h> #include <linux/slab.h> +#include <linux/module.h> #include <net/llc.h> #include <net/llc_pdu.h> #include <net/stp.h> @@ -88,9 +89,9 @@ void stp_proto_unregister(const struct stp_proto *proto) { mutex_lock(&stp_proto_mutex); if (is_zero_ether_addr(proto->group_address)) - rcu_assign_pointer(stp_proto, NULL); + RCU_INIT_POINTER(stp_proto, NULL); else - rcu_assign_pointer(garp_protos[proto->group_address[5] - + RCU_INIT_POINTER(garp_protos[proto->group_address[5] - GARP_ADDR_MIN], NULL); synchronize_rcu(); diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 8970ba139d73..efea35b02e7f 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -51,27 +51,6 @@ const char vlan_version[] = DRV_VERSION; /* End of global variables definitions. */ -static void vlan_group_free(struct vlan_group *grp) -{ - int i; - - for (i = 0; i < VLAN_GROUP_ARRAY_SPLIT_PARTS; i++) - kfree(grp->vlan_devices_arrays[i]); - kfree(grp); -} - -static struct vlan_group *vlan_group_alloc(struct net_device *real_dev) -{ - struct vlan_group *grp; - - grp = kzalloc(sizeof(struct vlan_group), GFP_KERNEL); - if (!grp) - return NULL; - - grp->real_dev = real_dev; - return grp; -} - static int vlan_group_prealloc_vid(struct vlan_group *vg, u16 vlan_id) { struct net_device **array; @@ -92,32 +71,29 @@ static int vlan_group_prealloc_vid(struct vlan_group *vg, u16 vlan_id) return 0; } -static void vlan_rcu_free(struct rcu_head *rcu) -{ - vlan_group_free(container_of(rcu, struct vlan_group, rcu)); -} - void unregister_vlan_dev(struct net_device *dev, struct list_head *head) { - struct vlan_dev_info *vlan = vlan_dev_info(dev); + struct vlan_dev_priv *vlan = vlan_dev_priv(dev); struct net_device *real_dev = vlan->real_dev; - const struct net_device_ops *ops = real_dev->netdev_ops; + struct vlan_info *vlan_info; struct vlan_group *grp; u16 vlan_id = vlan->vlan_id; ASSERT_RTNL(); - grp = rtnl_dereference(real_dev->vlgrp); - BUG_ON(!grp); + vlan_info = rtnl_dereference(real_dev->vlan_info); + BUG_ON(!vlan_info); + + grp = &vlan_info->grp; /* Take it out of our own structures, but be sure to interlock with * HW accelerating devices or SW vlan input packet processing if * VLAN is not 0 (leave it there for 802.1p). */ - if (vlan_id && (real_dev->features & NETIF_F_HW_VLAN_FILTER)) - ops->ndo_vlan_rx_kill_vid(real_dev, vlan_id); + if (vlan_id) + vlan_vid_del(real_dev, vlan_id); - grp->nr_vlans--; + grp->nr_vlan_devs--; if (vlan->flags & VLAN_FLAG_GVRP) vlan_gvrp_request_leave(dev); @@ -129,16 +105,9 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head) */ unregister_netdevice_queue(dev, head); - /* If the group is now empty, kill off the group. */ - if (grp->nr_vlans == 0) { + if (grp->nr_vlan_devs == 0) vlan_gvrp_uninit_applicant(real_dev); - rcu_assign_pointer(real_dev->vlgrp, NULL); - - /* Free the group, after all cpu's are done. */ - call_rcu(&grp->rcu, vlan_rcu_free); - } - /* Get rid of the vlan's reference to real_dev */ dev_put(real_dev); } @@ -167,21 +136,26 @@ int vlan_check_real_dev(struct net_device *real_dev, u16 vlan_id) int register_vlan_dev(struct net_device *dev) { - struct vlan_dev_info *vlan = vlan_dev_info(dev); + struct vlan_dev_priv *vlan = vlan_dev_priv(dev); struct net_device *real_dev = vlan->real_dev; - const struct net_device_ops *ops = real_dev->netdev_ops; u16 vlan_id = vlan->vlan_id; - struct vlan_group *grp, *ngrp = NULL; + struct vlan_info *vlan_info; + struct vlan_group *grp; int err; - grp = rtnl_dereference(real_dev->vlgrp); - if (!grp) { - ngrp = grp = vlan_group_alloc(real_dev); - if (!grp) - return -ENOBUFS; + err = vlan_vid_add(real_dev, vlan_id); + if (err) + return err; + + vlan_info = rtnl_dereference(real_dev->vlan_info); + /* vlan_info should be there now. vlan_vid_add took care of it */ + BUG_ON(!vlan_info); + + grp = &vlan_info->grp; + if (grp->nr_vlan_devs == 0) { err = vlan_gvrp_init_applicant(real_dev); if (err < 0) - goto out_free_group; + goto out_vid_del; } err = vlan_group_prealloc_vid(grp, vlan_id); @@ -192,7 +166,7 @@ int register_vlan_dev(struct net_device *dev) if (err < 0) goto out_uninit_applicant; - /* Account for reference in struct vlan_dev_info */ + /* Account for reference in struct vlan_dev_priv */ dev_hold(real_dev); netif_stacked_transfer_operstate(real_dev, dev); @@ -202,24 +176,15 @@ int register_vlan_dev(struct net_device *dev) * it into our local structure. */ vlan_group_set_device(grp, vlan_id, dev); - grp->nr_vlans++; - - if (ngrp) { - rcu_assign_pointer(real_dev->vlgrp, ngrp); - } - if (real_dev->features & NETIF_F_HW_VLAN_FILTER) - ops->ndo_vlan_rx_add_vid(real_dev, vlan_id); + grp->nr_vlan_devs++; return 0; out_uninit_applicant: - if (ngrp) + if (grp->nr_vlan_devs == 0) vlan_gvrp_uninit_applicant(real_dev); -out_free_group: - if (ngrp) { - /* Free the group, after all cpu's are done. */ - call_rcu(&ngrp->rcu, vlan_rcu_free); - } +out_vid_del: + vlan_vid_del(real_dev, vlan_id); return err; } @@ -267,7 +232,7 @@ static int register_vlan_device(struct net_device *real_dev, u16 vlan_id) snprintf(name, IFNAMSIZ, "vlan%.4i", vlan_id); } - new_dev = alloc_netdev(sizeof(struct vlan_dev_info), name, vlan_setup); + new_dev = alloc_netdev(sizeof(struct vlan_dev_priv), name, vlan_setup); if (new_dev == NULL) return -ENOBUFS; @@ -278,10 +243,10 @@ static int register_vlan_device(struct net_device *real_dev, u16 vlan_id) */ new_dev->mtu = real_dev->mtu; - vlan_dev_info(new_dev)->vlan_id = vlan_id; - vlan_dev_info(new_dev)->real_dev = real_dev; - vlan_dev_info(new_dev)->dent = NULL; - vlan_dev_info(new_dev)->flags = VLAN_FLAG_REORDER_HDR; + vlan_dev_priv(new_dev)->vlan_id = vlan_id; + vlan_dev_priv(new_dev)->real_dev = real_dev; + vlan_dev_priv(new_dev)->dent = NULL; + vlan_dev_priv(new_dev)->flags = VLAN_FLAG_REORDER_HDR; new_dev->rtnl_link_ops = &vlan_link_ops; err = register_vlan_dev(new_dev); @@ -298,7 +263,7 @@ out_free_newdev: static void vlan_sync_address(struct net_device *dev, struct net_device *vlandev) { - struct vlan_dev_info *vlan = vlan_dev_info(vlandev); + struct vlan_dev_priv *vlan = vlan_dev_priv(vlandev); /* May be called without an actual change */ if (!compare_ether_addr(vlan->real_dev_addr, dev->dev_addr)) @@ -360,25 +325,26 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, { struct net_device *dev = ptr; struct vlan_group *grp; + struct vlan_info *vlan_info; int i, flgs; struct net_device *vlandev; - struct vlan_dev_info *vlan; + struct vlan_dev_priv *vlan; LIST_HEAD(list); if (is_vlan_dev(dev)) __vlan_device_event(dev, event); if ((event == NETDEV_UP) && - (dev->features & NETIF_F_HW_VLAN_FILTER) && - dev->netdev_ops->ndo_vlan_rx_add_vid) { + (dev->features & NETIF_F_HW_VLAN_FILTER)) { pr_info("adding VLAN 0 to HW filter on device %s\n", dev->name); - dev->netdev_ops->ndo_vlan_rx_add_vid(dev, 0); + vlan_vid_add(dev, 0); } - grp = rtnl_dereference(dev->vlgrp); - if (!grp) + vlan_info = rtnl_dereference(dev->vlan_info); + if (!vlan_info) goto out; + grp = &vlan_info->grp; /* It is OK that we do not hold the group lock right now, * as we run under the RTNL lock. @@ -447,7 +413,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, if (!(flgs & IFF_UP)) continue; - vlan = vlan_dev_info(vlandev); + vlan = vlan_dev_priv(vlandev); if (!(vlan->flags & VLAN_FLAG_LOOSE_BINDING)) dev_change_flags(vlandev, flgs & ~IFF_UP); netif_stacked_transfer_operstate(dev, vlandev); @@ -465,7 +431,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, if (flgs & IFF_UP) continue; - vlan = vlan_dev_info(vlandev); + vlan = vlan_dev_priv(vlandev); if (!(vlan->flags & VLAN_FLAG_LOOSE_BINDING)) dev_change_flags(vlandev, flgs | IFF_UP); netif_stacked_transfer_operstate(dev, vlandev); @@ -482,9 +448,9 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, if (!vlandev) continue; - /* unregistration of last vlan destroys group, abort + /* removal of last vid destroys vlan_info, abort * afterwards */ - if (grp->nr_vlans == 1) + if (vlan_info->nr_vids == 1) i = VLAN_N_VID; unregister_vlan_dev(vlandev, &list); diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h index 9fd45f3571f9..a4886d94c40c 100644 --- a/net/8021q/vlan.h +++ b/net/8021q/vlan.h @@ -3,6 +3,7 @@ #include <linux/if_vlan.h> #include <linux/u64_stats_sync.h> +#include <linux/list.h> /** @@ -40,8 +41,10 @@ struct vlan_pcpu_stats { u32 tx_dropped; }; +struct netpoll; + /** - * struct vlan_dev_info - VLAN private device data + * struct vlan_dev_priv - VLAN private device data * @nr_ingress_mappings: number of ingress priority mappings * @ingress_priority_map: ingress priority mappings * @nr_egress_mappings: number of egress priority mappings @@ -53,7 +56,7 @@ struct vlan_pcpu_stats { * @dent: proc dir entry * @vlan_pcpu_stats: ptr to percpu rx stats */ -struct vlan_dev_info { +struct vlan_dev_priv { unsigned int nr_ingress_mappings; u32 ingress_priority_map[8]; unsigned int nr_egress_mappings; @@ -67,13 +70,39 @@ struct vlan_dev_info { struct proc_dir_entry *dent; struct vlan_pcpu_stats __percpu *vlan_pcpu_stats; +#ifdef CONFIG_NET_POLL_CONTROLLER + struct netpoll *netpoll; +#endif }; -static inline struct vlan_dev_info *vlan_dev_info(const struct net_device *dev) +static inline struct vlan_dev_priv *vlan_dev_priv(const struct net_device *dev) { return netdev_priv(dev); } +/* if this changes, algorithm will have to be reworked because this + * depends on completely exhausting the VLAN identifier space. Thus + * it gives constant time look-up, but in many cases it wastes memory. + */ +#define VLAN_GROUP_ARRAY_SPLIT_PARTS 8 +#define VLAN_GROUP_ARRAY_PART_LEN (VLAN_N_VID/VLAN_GROUP_ARRAY_SPLIT_PARTS) + +struct vlan_group { + unsigned int nr_vlan_devs; + struct hlist_node hlist; /* linked list */ + struct net_device **vlan_devices_arrays[VLAN_GROUP_ARRAY_SPLIT_PARTS]; +}; + +struct vlan_info { + struct net_device *real_dev; /* The ethernet(like) device + * the vlan is attached to. + */ + struct vlan_group grp; + struct list_head vid_list; + unsigned int nr_vids; + struct rcu_head rcu; +}; + static inline struct net_device *vlan_group_get_device(struct vlan_group *vg, u16 vlan_id) { @@ -97,10 +126,10 @@ static inline void vlan_group_set_device(struct vlan_group *vg, static inline struct net_device *vlan_find_dev(struct net_device *real_dev, u16 vlan_id) { - struct vlan_group *grp = rcu_dereference_rtnl(real_dev->vlgrp); + struct vlan_info *vlan_info = rcu_dereference_rtnl(real_dev->vlan_info); - if (grp) - return vlan_group_get_device(grp, vlan_id); + if (vlan_info) + return vlan_group_get_device(&vlan_info->grp, vlan_id); return NULL; } @@ -121,7 +150,7 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head); static inline u32 vlan_get_ingress_priority(struct net_device *dev, u16 vlan_tci) { - struct vlan_dev_info *vip = vlan_dev_info(dev); + struct vlan_dev_priv *vip = vlan_dev_priv(dev); return vip->ingress_priority_map[(vlan_tci >> VLAN_PRIO_SHIFT) & 0x7]; } diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index f1f2f7bb6661..4d39d802be2c 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -2,9 +2,10 @@ #include <linux/netdevice.h> #include <linux/if_vlan.h> #include <linux/netpoll.h> +#include <linux/export.h> #include "vlan.h" -bool vlan_do_receive(struct sk_buff **skbp) +bool vlan_do_receive(struct sk_buff **skbp, bool last_handler) { struct sk_buff *skb = *skbp; u16 vlan_id = skb->vlan_tci & VLAN_VID_MASK; @@ -13,7 +14,10 @@ bool vlan_do_receive(struct sk_buff **skbp) vlan_dev = vlan_find_dev(skb->dev, vlan_id); if (!vlan_dev) { - if (vlan_id) + /* Only the last call to vlan_do_receive() should change + * pkt_type to PACKET_OTHERHOST + */ + if (vlan_id && last_handler) skb->pkt_type = PACKET_OTHERHOST; return false; } @@ -32,7 +36,7 @@ bool vlan_do_receive(struct sk_buff **skbp) skb->pkt_type = PACKET_HOST; } - if (!(vlan_dev_info(vlan_dev)->flags & VLAN_FLAG_REORDER_HDR)) { + if (!(vlan_dev_priv(vlan_dev)->flags & VLAN_FLAG_REORDER_HDR)) { unsigned int offset = skb->data - skb_mac_header(skb); /* @@ -51,7 +55,7 @@ bool vlan_do_receive(struct sk_buff **skbp) skb->priority = vlan_get_ingress_priority(vlan_dev, skb->vlan_tci); skb->vlan_tci = 0; - rx_stats = this_cpu_ptr(vlan_dev_info(vlan_dev)->vlan_pcpu_stats); + rx_stats = this_cpu_ptr(vlan_dev_priv(vlan_dev)->vlan_pcpu_stats); u64_stats_update_begin(&rx_stats->syncp); rx_stats->rx_packets++; @@ -67,10 +71,10 @@ bool vlan_do_receive(struct sk_buff **skbp) struct net_device *__vlan_find_dev_deep(struct net_device *real_dev, u16 vlan_id) { - struct vlan_group *grp = rcu_dereference_rtnl(real_dev->vlgrp); + struct vlan_info *vlan_info = rcu_dereference_rtnl(real_dev->vlan_info); - if (grp) { - return vlan_group_get_device(grp, vlan_id); + if (vlan_info) { + return vlan_group_get_device(&vlan_info->grp, vlan_id); } else { /* * Bonding slaves do not have grp assigned to themselves. @@ -86,13 +90,13 @@ EXPORT_SYMBOL(__vlan_find_dev_deep); struct net_device *vlan_dev_real_dev(const struct net_device *dev) { - return vlan_dev_info(dev)->real_dev; + return vlan_dev_priv(dev)->real_dev; } EXPORT_SYMBOL(vlan_dev_real_dev); u16 vlan_dev_vlan_id(const struct net_device *dev) { - return vlan_dev_info(dev)->vlan_id; + return vlan_dev_priv(dev)->vlan_id; } EXPORT_SYMBOL(vlan_dev_vlan_id); @@ -106,39 +110,6 @@ static struct sk_buff *vlan_reorder_header(struct sk_buff *skb) return skb; } -static void vlan_set_encap_proto(struct sk_buff *skb, struct vlan_hdr *vhdr) -{ - __be16 proto; - unsigned char *rawp; - - /* - * Was a VLAN packet, grab the encapsulated protocol, which the layer - * three protocols care about. - */ - - proto = vhdr->h_vlan_encapsulated_proto; - if (ntohs(proto) >= 1536) { - skb->protocol = proto; - return; - } - - rawp = skb->data; - if (*(unsigned short *) rawp == 0xFFFF) - /* - * This is a magic hack to spot IPX packets. Older Novell - * breaks the protocol design and runs IPX over 802.3 without - * an 802.2 LLC layer. We look for FFFF which isn't a used - * 802.2 SSAP/DSAP. This won't work for fault tolerant netware - * but does for the rest. - */ - skb->protocol = htons(ETH_P_802_3); - else - /* - * Real 802.2 LLC - */ - skb->protocol = htons(ETH_P_802_2); -} - struct sk_buff *vlan_untag(struct sk_buff *skb) { struct vlan_hdr *vhdr; @@ -175,3 +146,226 @@ err_free: kfree_skb(skb); return NULL; } + + +/* + * vlan info and vid list + */ + +static void vlan_group_free(struct vlan_group *grp) +{ + int i; + + for (i = 0; i < VLAN_GROUP_ARRAY_SPLIT_PARTS; i++) + kfree(grp->vlan_devices_arrays[i]); +} + +static void vlan_info_free(struct vlan_info *vlan_info) +{ + vlan_group_free(&vlan_info->grp); + kfree(vlan_info); +} + +static void vlan_info_rcu_free(struct rcu_head *rcu) +{ + vlan_info_free(container_of(rcu, struct vlan_info, rcu)); +} + +static struct vlan_info *vlan_info_alloc(struct net_device *dev) +{ + struct vlan_info *vlan_info; + + vlan_info = kzalloc(sizeof(struct vlan_info), GFP_KERNEL); + if (!vlan_info) + return NULL; + + vlan_info->real_dev = dev; + INIT_LIST_HEAD(&vlan_info->vid_list); + return vlan_info; +} + +struct vlan_vid_info { + struct list_head list; + unsigned short vid; + int refcount; +}; + +static struct vlan_vid_info *vlan_vid_info_get(struct vlan_info *vlan_info, + unsigned short vid) +{ + struct vlan_vid_info *vid_info; + + list_for_each_entry(vid_info, &vlan_info->vid_list, list) { + if (vid_info->vid == vid) + return vid_info; + } + return NULL; +} + +static struct vlan_vid_info *vlan_vid_info_alloc(unsigned short vid) +{ + struct vlan_vid_info *vid_info; + + vid_info = kzalloc(sizeof(struct vlan_vid_info), GFP_KERNEL); + if (!vid_info) + return NULL; + vid_info->vid = vid; + + return vid_info; +} + +static int __vlan_vid_add(struct vlan_info *vlan_info, unsigned short vid, + struct vlan_vid_info **pvid_info) +{ + struct net_device *dev = vlan_info->real_dev; + const struct net_device_ops *ops = dev->netdev_ops; + struct vlan_vid_info *vid_info; + int err; + + vid_info = vlan_vid_info_alloc(vid); + if (!vid_info) + return -ENOMEM; + + if ((dev->features & NETIF_F_HW_VLAN_FILTER) && + ops->ndo_vlan_rx_add_vid) { + err = ops->ndo_vlan_rx_add_vid(dev, vid); + if (err) { + kfree(vid_info); + return err; + } + } + list_add(&vid_info->list, &vlan_info->vid_list); + vlan_info->nr_vids++; + *pvid_info = vid_info; + return 0; +} + +int vlan_vid_add(struct net_device *dev, unsigned short vid) +{ + struct vlan_info *vlan_info; + struct vlan_vid_info *vid_info; + bool vlan_info_created = false; + int err; + + ASSERT_RTNL(); + + vlan_info = rtnl_dereference(dev->vlan_info); + if (!vlan_info) { + vlan_info = vlan_info_alloc(dev); + if (!vlan_info) + return -ENOMEM; + vlan_info_created = true; + } + vid_info = vlan_vid_info_get(vlan_info, vid); + if (!vid_info) { + err = __vlan_vid_add(vlan_info, vid, &vid_info); + if (err) + goto out_free_vlan_info; + } + vid_info->refcount++; + + if (vlan_info_created) + rcu_assign_pointer(dev->vlan_info, vlan_info); + + return 0; + +out_free_vlan_info: + if (vlan_info_created) + kfree(vlan_info); + return err; +} +EXPORT_SYMBOL(vlan_vid_add); + +static void __vlan_vid_del(struct vlan_info *vlan_info, + struct vlan_vid_info *vid_info) +{ + struct net_device *dev = vlan_info->real_dev; + const struct net_device_ops *ops = dev->netdev_ops; + unsigned short vid = vid_info->vid; + int err; + + if ((dev->features & NETIF_F_HW_VLAN_FILTER) && + ops->ndo_vlan_rx_kill_vid) { + err = ops->ndo_vlan_rx_kill_vid(dev, vid); + if (err) { + pr_warn("failed to kill vid %d for device %s\n", + vid, dev->name); + } + } + list_del(&vid_info->list); + kfree(vid_info); + vlan_info->nr_vids--; +} + +void vlan_vid_del(struct net_device *dev, unsigned short vid) +{ + struct vlan_info *vlan_info; + struct vlan_vid_info *vid_info; + + ASSERT_RTNL(); + + vlan_info = rtnl_dereference(dev->vlan_info); + if (!vlan_info) + return; + + vid_info = vlan_vid_info_get(vlan_info, vid); + if (!vid_info) + return; + vid_info->refcount--; + if (vid_info->refcount == 0) { + __vlan_vid_del(vlan_info, vid_info); + if (vlan_info->nr_vids == 0) { + RCU_INIT_POINTER(dev->vlan_info, NULL); + call_rcu(&vlan_info->rcu, vlan_info_rcu_free); + } + } +} +EXPORT_SYMBOL(vlan_vid_del); + +int vlan_vids_add_by_dev(struct net_device *dev, + const struct net_device *by_dev) +{ + struct vlan_vid_info *vid_info; + struct vlan_info *vlan_info; + int err; + + ASSERT_RTNL(); + + vlan_info = rtnl_dereference(by_dev->vlan_info); + if (!vlan_info) + return 0; + + list_for_each_entry(vid_info, &vlan_info->vid_list, list) { + err = vlan_vid_add(dev, vid_info->vid); + if (err) + goto unwind; + } + return 0; + +unwind: + list_for_each_entry_continue_reverse(vid_info, + &vlan_info->vid_list, + list) { + vlan_vid_del(dev, vid_info->vid); + } + + return err; +} +EXPORT_SYMBOL(vlan_vids_add_by_dev); + +void vlan_vids_del_by_dev(struct net_device *dev, + const struct net_device *by_dev) +{ + struct vlan_vid_info *vid_info; + struct vlan_info *vlan_info; + + ASSERT_RTNL(); + + vlan_info = rtnl_dereference(by_dev->vlan_info); + if (!vlan_info) + return; + + list_for_each_entry(vid_info, &vlan_info->vid_list, list) + vlan_vid_del(dev, vid_info->vid); +} +EXPORT_SYMBOL(vlan_vids_del_by_dev); diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 9d40a071d038..9988d4abb372 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -33,6 +33,7 @@ #include "vlan.h" #include "vlanproc.h" #include <linux/if_vlan.h> +#include <linux/netpoll.h> /* * Rebuild the Ethernet MAC header. This is called after an ARP @@ -72,7 +73,7 @@ vlan_dev_get_egress_qos_mask(struct net_device *dev, struct sk_buff *skb) { struct vlan_priority_tci_mapping *mp; - mp = vlan_dev_info(dev)->egress_priority_map[(skb->priority & 0xF)]; + mp = vlan_dev_priv(dev)->egress_priority_map[(skb->priority & 0xF)]; while (mp) { if (mp->priority == skb->priority) { return mp->vlan_qos; /* This should already be shifted @@ -103,10 +104,10 @@ static int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev, u16 vlan_tci = 0; int rc; - if (!(vlan_dev_info(dev)->flags & VLAN_FLAG_REORDER_HDR)) { + if (!(vlan_dev_priv(dev)->flags & VLAN_FLAG_REORDER_HDR)) { vhdr = (struct vlan_hdr *) skb_push(skb, VLAN_HLEN); - vlan_tci = vlan_dev_info(dev)->vlan_id; + vlan_tci = vlan_dev_priv(dev)->vlan_id; vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb); vhdr->h_vlan_TCI = htons(vlan_tci); @@ -129,7 +130,7 @@ static int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev, saddr = dev->dev_addr; /* Now make the underlying real hard header */ - dev = vlan_dev_info(dev)->real_dev; + dev = vlan_dev_priv(dev)->real_dev; rc = dev_hard_header(skb, dev, type, daddr, saddr, len + vhdrlen); if (rc > 0) rc += vhdrlen; @@ -149,27 +150,29 @@ static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb, * OTHER THINGS LIKE FDDI/TokenRing/802.3 SNAPs... */ if (veth->h_vlan_proto != htons(ETH_P_8021Q) || - vlan_dev_info(dev)->flags & VLAN_FLAG_REORDER_HDR) { + vlan_dev_priv(dev)->flags & VLAN_FLAG_REORDER_HDR) { u16 vlan_tci; - vlan_tci = vlan_dev_info(dev)->vlan_id; + vlan_tci = vlan_dev_priv(dev)->vlan_id; vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb); skb = __vlan_hwaccel_put_tag(skb, vlan_tci); } - skb_set_dev(skb, vlan_dev_info(dev)->real_dev); + skb_set_dev(skb, vlan_dev_priv(dev)->real_dev); len = skb->len; + if (netpoll_tx_running(dev)) + return skb->dev->netdev_ops->ndo_start_xmit(skb, skb->dev); ret = dev_queue_xmit(skb); if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) { struct vlan_pcpu_stats *stats; - stats = this_cpu_ptr(vlan_dev_info(dev)->vlan_pcpu_stats); + stats = this_cpu_ptr(vlan_dev_priv(dev)->vlan_pcpu_stats); u64_stats_update_begin(&stats->syncp); stats->tx_packets++; stats->tx_bytes += len; u64_stats_update_end(&stats->syncp); } else { - this_cpu_inc(vlan_dev_info(dev)->vlan_pcpu_stats->tx_dropped); + this_cpu_inc(vlan_dev_priv(dev)->vlan_pcpu_stats->tx_dropped); } return ret; @@ -180,7 +183,7 @@ static int vlan_dev_change_mtu(struct net_device *dev, int new_mtu) /* TODO: gotta make sure the underlying layer can handle it, * maybe an IFF_VLAN_CAPABLE flag for devices? */ - if (vlan_dev_info(dev)->real_dev->mtu < new_mtu) + if (vlan_dev_priv(dev)->real_dev->mtu < new_mtu) return -ERANGE; dev->mtu = new_mtu; @@ -191,7 +194,7 @@ static int vlan_dev_change_mtu(struct net_device *dev, int new_mtu) void vlan_dev_set_ingress_priority(const struct net_device *dev, u32 skb_prio, u16 vlan_prio) { - struct vlan_dev_info *vlan = vlan_dev_info(dev); + struct vlan_dev_priv *vlan = vlan_dev_priv(dev); if (vlan->ingress_priority_map[vlan_prio & 0x7] && !skb_prio) vlan->nr_ingress_mappings--; @@ -204,7 +207,7 @@ void vlan_dev_set_ingress_priority(const struct net_device *dev, int vlan_dev_set_egress_priority(const struct net_device *dev, u32 skb_prio, u16 vlan_prio) { - struct vlan_dev_info *vlan = vlan_dev_info(dev); + struct vlan_dev_priv *vlan = vlan_dev_priv(dev); struct vlan_priority_tci_mapping *mp = NULL; struct vlan_priority_tci_mapping *np; u32 vlan_qos = (vlan_prio << VLAN_PRIO_SHIFT) & VLAN_PRIO_MASK; @@ -241,7 +244,7 @@ int vlan_dev_set_egress_priority(const struct net_device *dev, /* Flags are defined in the vlan_flags enum in include/linux/if_vlan.h file. */ int vlan_dev_change_flags(const struct net_device *dev, u32 flags, u32 mask) { - struct vlan_dev_info *vlan = vlan_dev_info(dev); + struct vlan_dev_priv *vlan = vlan_dev_priv(dev); u32 old_flags = vlan->flags; if (mask & ~(VLAN_FLAG_REORDER_HDR | VLAN_FLAG_GVRP | @@ -261,12 +264,12 @@ int vlan_dev_change_flags(const struct net_device *dev, u32 flags, u32 mask) void vlan_dev_get_realdev_name(const struct net_device *dev, char *result) { - strncpy(result, vlan_dev_info(dev)->real_dev->name, 23); + strncpy(result, vlan_dev_priv(dev)->real_dev->name, 23); } static int vlan_dev_open(struct net_device *dev) { - struct vlan_dev_info *vlan = vlan_dev_info(dev); + struct vlan_dev_priv *vlan = vlan_dev_priv(dev); struct net_device *real_dev = vlan->real_dev; int err; @@ -313,7 +316,7 @@ out: static int vlan_dev_stop(struct net_device *dev) { - struct vlan_dev_info *vlan = vlan_dev_info(dev); + struct vlan_dev_priv *vlan = vlan_dev_priv(dev); struct net_device *real_dev = vlan->real_dev; dev_mc_unsync(real_dev, dev); @@ -332,7 +335,7 @@ static int vlan_dev_stop(struct net_device *dev) static int vlan_dev_set_mac_address(struct net_device *dev, void *p) { - struct net_device *real_dev = vlan_dev_info(dev)->real_dev; + struct net_device *real_dev = vlan_dev_priv(dev)->real_dev; struct sockaddr *addr = p; int err; @@ -358,7 +361,7 @@ out: static int vlan_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { - struct net_device *real_dev = vlan_dev_info(dev)->real_dev; + struct net_device *real_dev = vlan_dev_priv(dev)->real_dev; const struct net_device_ops *ops = real_dev->netdev_ops; struct ifreq ifrr; int err = -EOPNOTSUPP; @@ -383,7 +386,7 @@ static int vlan_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) static int vlan_dev_neigh_setup(struct net_device *dev, struct neigh_parms *pa) { - struct net_device *real_dev = vlan_dev_info(dev)->real_dev; + struct net_device *real_dev = vlan_dev_priv(dev)->real_dev; const struct net_device_ops *ops = real_dev->netdev_ops; int err = 0; @@ -397,7 +400,7 @@ static int vlan_dev_neigh_setup(struct net_device *dev, struct neigh_parms *pa) static int vlan_dev_fcoe_ddp_setup(struct net_device *dev, u16 xid, struct scatterlist *sgl, unsigned int sgc) { - struct net_device *real_dev = vlan_dev_info(dev)->real_dev; + struct net_device *real_dev = vlan_dev_priv(dev)->real_dev; const struct net_device_ops *ops = real_dev->netdev_ops; int rc = 0; @@ -409,7 +412,7 @@ static int vlan_dev_fcoe_ddp_setup(struct net_device *dev, u16 xid, static int vlan_dev_fcoe_ddp_done(struct net_device *dev, u16 xid) { - struct net_device *real_dev = vlan_dev_info(dev)->real_dev; + struct net_device *real_dev = vlan_dev_priv(dev)->real_dev; const struct net_device_ops *ops = real_dev->netdev_ops; int len = 0; @@ -421,7 +424,7 @@ static int vlan_dev_fcoe_ddp_done(struct net_device *dev, u16 xid) static int vlan_dev_fcoe_enable(struct net_device *dev) { - struct net_device *real_dev = vlan_dev_info(dev)->real_dev; + struct net_device *real_dev = vlan_dev_priv(dev)->real_dev; const struct net_device_ops *ops = real_dev->netdev_ops; int rc = -EINVAL; @@ -432,7 +435,7 @@ static int vlan_dev_fcoe_enable(struct net_device *dev) static int vlan_dev_fcoe_disable(struct net_device *dev) { - struct net_device *real_dev = vlan_dev_info(dev)->real_dev; + struct net_device *real_dev = vlan_dev_priv(dev)->real_dev; const struct net_device_ops *ops = real_dev->netdev_ops; int rc = -EINVAL; @@ -443,7 +446,7 @@ static int vlan_dev_fcoe_disable(struct net_device *dev) static int vlan_dev_fcoe_get_wwn(struct net_device *dev, u64 *wwn, int type) { - struct net_device *real_dev = vlan_dev_info(dev)->real_dev; + struct net_device *real_dev = vlan_dev_priv(dev)->real_dev; const struct net_device_ops *ops = real_dev->netdev_ops; int rc = -EINVAL; @@ -455,7 +458,7 @@ static int vlan_dev_fcoe_get_wwn(struct net_device *dev, u64 *wwn, int type) static int vlan_dev_fcoe_ddp_target(struct net_device *dev, u16 xid, struct scatterlist *sgl, unsigned int sgc) { - struct net_device *real_dev = vlan_dev_info(dev)->real_dev; + struct net_device *real_dev = vlan_dev_priv(dev)->real_dev; const struct net_device_ops *ops = real_dev->netdev_ops; int rc = 0; @@ -468,18 +471,20 @@ static int vlan_dev_fcoe_ddp_target(struct net_device *dev, u16 xid, static void vlan_dev_change_rx_flags(struct net_device *dev, int change) { - struct net_device *real_dev = vlan_dev_info(dev)->real_dev; + struct net_device *real_dev = vlan_dev_priv(dev)->real_dev; - if (change & IFF_ALLMULTI) - dev_set_allmulti(real_dev, dev->flags & IFF_ALLMULTI ? 1 : -1); - if (change & IFF_PROMISC) - dev_set_promiscuity(real_dev, dev->flags & IFF_PROMISC ? 1 : -1); + if (dev->flags & IFF_UP) { + if (change & IFF_ALLMULTI) + dev_set_allmulti(real_dev, dev->flags & IFF_ALLMULTI ? 1 : -1); + if (change & IFF_PROMISC) + dev_set_promiscuity(real_dev, dev->flags & IFF_PROMISC ? 1 : -1); + } } static void vlan_dev_set_rx_mode(struct net_device *vlan_dev) { - dev_mc_sync(vlan_dev_info(vlan_dev)->real_dev, vlan_dev); - dev_uc_sync(vlan_dev_info(vlan_dev)->real_dev, vlan_dev); + dev_mc_sync(vlan_dev_priv(vlan_dev)->real_dev, vlan_dev); + dev_uc_sync(vlan_dev_priv(vlan_dev)->real_dev, vlan_dev); } /* @@ -517,7 +522,7 @@ static const struct net_device_ops vlan_netdev_ops; static int vlan_dev_init(struct net_device *dev) { - struct net_device *real_dev = vlan_dev_info(dev)->real_dev; + struct net_device *real_dev = vlan_dev_priv(dev)->real_dev; int subclass = 0; netif_carrier_off(dev); @@ -566,8 +571,8 @@ static int vlan_dev_init(struct net_device *dev) vlan_dev_set_lockdep_class(dev, subclass); - vlan_dev_info(dev)->vlan_pcpu_stats = alloc_percpu(struct vlan_pcpu_stats); - if (!vlan_dev_info(dev)->vlan_pcpu_stats) + vlan_dev_priv(dev)->vlan_pcpu_stats = alloc_percpu(struct vlan_pcpu_stats); + if (!vlan_dev_priv(dev)->vlan_pcpu_stats) return -ENOMEM; return 0; @@ -576,7 +581,7 @@ static int vlan_dev_init(struct net_device *dev) static void vlan_dev_uninit(struct net_device *dev) { struct vlan_priority_tci_mapping *pm; - struct vlan_dev_info *vlan = vlan_dev_info(dev); + struct vlan_dev_priv *vlan = vlan_dev_priv(dev); int i; free_percpu(vlan->vlan_pcpu_stats); @@ -589,18 +594,17 @@ static void vlan_dev_uninit(struct net_device *dev) } } -static u32 vlan_dev_fix_features(struct net_device *dev, u32 features) +static netdev_features_t vlan_dev_fix_features(struct net_device *dev, + netdev_features_t features) { - struct net_device *real_dev = vlan_dev_info(dev)->real_dev; + struct net_device *real_dev = vlan_dev_priv(dev)->real_dev; u32 old_features = features; - features &= real_dev->features; features &= real_dev->vlan_features; + features |= NETIF_F_RXCSUM; + features &= real_dev->features; features |= old_features & NETIF_F_SOFT_FEATURES; - - if (dev_ethtool_get_rx_csum(real_dev)) - features |= NETIF_F_RXCSUM; features |= NETIF_F_LLTX; return features; @@ -609,8 +613,9 @@ static u32 vlan_dev_fix_features(struct net_device *dev, u32 features) static int vlan_ethtool_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) { - const struct vlan_dev_info *vlan = vlan_dev_info(dev); - return dev_ethtool_get_settings(vlan->real_dev, cmd); + const struct vlan_dev_priv *vlan = vlan_dev_priv(dev); + + return __ethtool_get_settings(vlan->real_dev, cmd); } static void vlan_ethtool_get_drvinfo(struct net_device *dev, @@ -624,7 +629,7 @@ static void vlan_ethtool_get_drvinfo(struct net_device *dev, static struct rtnl_link_stats64 *vlan_dev_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) { - if (vlan_dev_info(dev)->vlan_pcpu_stats) { + if (vlan_dev_priv(dev)->vlan_pcpu_stats) { struct vlan_pcpu_stats *p; u32 rx_errors = 0, tx_dropped = 0; int i; @@ -633,7 +638,7 @@ static struct rtnl_link_stats64 *vlan_dev_get_stats64(struct net_device *dev, st u64 rxpackets, rxbytes, rxmulticast, txpackets, txbytes; unsigned int start; - p = per_cpu_ptr(vlan_dev_info(dev)->vlan_pcpu_stats, i); + p = per_cpu_ptr(vlan_dev_priv(dev)->vlan_pcpu_stats, i); do { start = u64_stats_fetch_begin_bh(&p->syncp); rxpackets = p->rx_packets; @@ -658,6 +663,57 @@ static struct rtnl_link_stats64 *vlan_dev_get_stats64(struct net_device *dev, st return stats; } +#ifdef CONFIG_NET_POLL_CONTROLLER +static void vlan_dev_poll_controller(struct net_device *dev) +{ + return; +} + +static int vlan_dev_netpoll_setup(struct net_device *dev, struct netpoll_info *npinfo) +{ + struct vlan_dev_priv *info = vlan_dev_priv(dev); + struct net_device *real_dev = info->real_dev; + struct netpoll *netpoll; + int err = 0; + + netpoll = kzalloc(sizeof(*netpoll), GFP_KERNEL); + err = -ENOMEM; + if (!netpoll) + goto out; + + netpoll->dev = real_dev; + strlcpy(netpoll->dev_name, real_dev->name, IFNAMSIZ); + + err = __netpoll_setup(netpoll); + if (err) { + kfree(netpoll); + goto out; + } + + info->netpoll = netpoll; + +out: + return err; +} + +static void vlan_dev_netpoll_cleanup(struct net_device *dev) +{ + struct vlan_dev_priv *info = vlan_dev_priv(dev); + struct netpoll *netpoll = info->netpoll; + + if (!netpoll) + return; + + info->netpoll = NULL; + + /* Wait for transmitting packets to finish before freeing. */ + synchronize_rcu_bh(); + + __netpoll_cleanup(netpoll); + kfree(netpoll); +} +#endif /* CONFIG_NET_POLL_CONTROLLER */ + static const struct ethtool_ops vlan_ethtool_ops = { .get_settings = vlan_ethtool_get_settings, .get_drvinfo = vlan_ethtool_get_drvinfo, @@ -674,7 +730,6 @@ static const struct net_device_ops vlan_netdev_ops = { .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = vlan_dev_set_mac_address, .ndo_set_rx_mode = vlan_dev_set_rx_mode, - .ndo_set_multicast_list = vlan_dev_set_rx_mode, .ndo_change_rx_flags = vlan_dev_change_rx_flags, .ndo_do_ioctl = vlan_dev_ioctl, .ndo_neigh_setup = vlan_dev_neigh_setup, @@ -687,6 +742,11 @@ static const struct net_device_ops vlan_netdev_ops = { .ndo_fcoe_get_wwn = vlan_dev_fcoe_get_wwn, .ndo_fcoe_ddp_target = vlan_dev_fcoe_ddp_target, #endif +#ifdef CONFIG_NET_POLL_CONTROLLER + .ndo_poll_controller = vlan_dev_poll_controller, + .ndo_netpoll_setup = vlan_dev_netpoll_setup, + .ndo_netpoll_cleanup = vlan_dev_netpoll_cleanup, +#endif .ndo_fix_features = vlan_dev_fix_features, }; diff --git a/net/8021q/vlan_gvrp.c b/net/8021q/vlan_gvrp.c index 061ceceeef12..6f9755352760 100644 --- a/net/8021q/vlan_gvrp.c +++ b/net/8021q/vlan_gvrp.c @@ -29,7 +29,7 @@ static struct garp_application vlan_gvrp_app __read_mostly = { int vlan_gvrp_request_join(const struct net_device *dev) { - const struct vlan_dev_info *vlan = vlan_dev_info(dev); + const struct vlan_dev_priv *vlan = vlan_dev_priv(dev); __be16 vlan_id = htons(vlan->vlan_id); return garp_request_join(vlan->real_dev, &vlan_gvrp_app, @@ -38,7 +38,7 @@ int vlan_gvrp_request_join(const struct net_device *dev) void vlan_gvrp_request_leave(const struct net_device *dev) { - const struct vlan_dev_info *vlan = vlan_dev_info(dev); + const struct vlan_dev_priv *vlan = vlan_dev_priv(dev); __be16 vlan_id = htons(vlan->vlan_id); garp_request_leave(vlan->real_dev, &vlan_gvrp_app, diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c index be9a5c19a775..50711368ad6a 100644 --- a/net/8021q/vlan_netlink.c +++ b/net/8021q/vlan_netlink.c @@ -11,6 +11,7 @@ #include <linux/kernel.h> #include <linux/netdevice.h> #include <linux/if_vlan.h> +#include <linux/module.h> #include <net/net_namespace.h> #include <net/netlink.h> #include <net/rtnetlink.h> @@ -104,7 +105,7 @@ static int vlan_changelink(struct net_device *dev, static int vlan_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) { - struct vlan_dev_info *vlan = vlan_dev_info(dev); + struct vlan_dev_priv *vlan = vlan_dev_priv(dev); struct net_device *real_dev; int err; @@ -148,7 +149,7 @@ static inline size_t vlan_qos_map_size(unsigned int n) static size_t vlan_get_size(const struct net_device *dev) { - struct vlan_dev_info *vlan = vlan_dev_info(dev); + struct vlan_dev_priv *vlan = vlan_dev_priv(dev); return nla_total_size(2) + /* IFLA_VLAN_ID */ sizeof(struct ifla_vlan_flags) + /* IFLA_VLAN_FLAGS */ @@ -158,14 +159,14 @@ static size_t vlan_get_size(const struct net_device *dev) static int vlan_fill_info(struct sk_buff *skb, const struct net_device *dev) { - struct vlan_dev_info *vlan = vlan_dev_info(dev); + struct vlan_dev_priv *vlan = vlan_dev_priv(dev); struct vlan_priority_tci_mapping *pm; struct ifla_vlan_flags f; struct ifla_vlan_qos_mapping m; struct nlattr *nest; unsigned int i; - NLA_PUT_U16(skb, IFLA_VLAN_ID, vlan_dev_info(dev)->vlan_id); + NLA_PUT_U16(skb, IFLA_VLAN_ID, vlan_dev_priv(dev)->vlan_id); if (vlan->flags) { f.flags = vlan->flags; f.mask = ~0; @@ -217,7 +218,7 @@ struct rtnl_link_ops vlan_link_ops __read_mostly = { .kind = "vlan", .maxtype = IFLA_VLAN_MAX, .policy = vlan_policy, - .priv_size = sizeof(struct vlan_dev_info), + .priv_size = sizeof(struct vlan_dev_priv), .setup = vlan_setup, .validate = vlan_validate, .newlink = vlan_newlink, diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c index d34b6daf8930..c718fd3664b6 100644 --- a/net/8021q/vlanproc.c +++ b/net/8021q/vlanproc.c @@ -168,13 +168,13 @@ err: int vlan_proc_add_dev(struct net_device *vlandev) { - struct vlan_dev_info *dev_info = vlan_dev_info(vlandev); + struct vlan_dev_priv *vlan = vlan_dev_priv(vlandev); struct vlan_net *vn = net_generic(dev_net(vlandev), vlan_net_id); - dev_info->dent = + vlan->dent = proc_create_data(vlandev->name, S_IFREG|S_IRUSR|S_IWUSR, vn->proc_vlan_dir, &vlandev_fops, vlandev); - if (!dev_info->dent) + if (!vlan->dent) return -ENOBUFS; return 0; } @@ -187,10 +187,10 @@ int vlan_proc_rem_dev(struct net_device *vlandev) struct vlan_net *vn = net_generic(dev_net(vlandev), vlan_net_id); /** NOTE: This will consume the memory pointed to by dent, it seems. */ - if (vlan_dev_info(vlandev)->dent) { - remove_proc_entry(vlan_dev_info(vlandev)->dent->name, + if (vlan_dev_priv(vlandev)->dent) { + remove_proc_entry(vlan_dev_priv(vlandev)->dent->name, vn->proc_vlan_dir); - vlan_dev_info(vlandev)->dent = NULL; + vlan_dev_priv(vlandev)->dent = NULL; } return 0; } @@ -268,10 +268,10 @@ static int vlan_seq_show(struct seq_file *seq, void *v) nmtype ? nmtype : "UNKNOWN"); } else { const struct net_device *vlandev = v; - const struct vlan_dev_info *dev_info = vlan_dev_info(vlandev); + const struct vlan_dev_priv *vlan = vlan_dev_priv(vlandev); seq_printf(seq, "%-15s| %d | %s\n", vlandev->name, - dev_info->vlan_id, dev_info->real_dev->name); + vlan->vlan_id, vlan->real_dev->name); } return 0; } @@ -279,7 +279,7 @@ static int vlan_seq_show(struct seq_file *seq, void *v) static int vlandev_seq_show(struct seq_file *seq, void *offset) { struct net_device *vlandev = (struct net_device *) seq->private; - const struct vlan_dev_info *dev_info = vlan_dev_info(vlandev); + const struct vlan_dev_priv *vlan = vlan_dev_priv(vlandev); struct rtnl_link_stats64 temp; const struct rtnl_link_stats64 *stats; static const char fmt64[] = "%30s %12llu\n"; @@ -291,8 +291,8 @@ static int vlandev_seq_show(struct seq_file *seq, void *offset) stats = dev_get_stats(vlandev, &temp); seq_printf(seq, "%s VID: %d REORDER_HDR: %i dev->priv_flags: %hx\n", - vlandev->name, dev_info->vlan_id, - (int)(dev_info->flags & 1), vlandev->priv_flags); + vlandev->name, vlan->vlan_id, + (int)(vlan->flags & 1), vlandev->priv_flags); seq_printf(seq, fmt64, "total frames received", stats->rx_packets); seq_printf(seq, fmt64, "total bytes received", stats->rx_bytes); @@ -300,23 +300,23 @@ static int vlandev_seq_show(struct seq_file *seq, void *offset) seq_puts(seq, "\n"); seq_printf(seq, fmt64, "total frames transmitted", stats->tx_packets); seq_printf(seq, fmt64, "total bytes transmitted", stats->tx_bytes); - seq_printf(seq, "Device: %s", dev_info->real_dev->name); + seq_printf(seq, "Device: %s", vlan->real_dev->name); /* now show all PRIORITY mappings relating to this VLAN */ seq_printf(seq, "\nINGRESS priority mappings: " "0:%u 1:%u 2:%u 3:%u 4:%u 5:%u 6:%u 7:%u\n", - dev_info->ingress_priority_map[0], - dev_info->ingress_priority_map[1], - dev_info->ingress_priority_map[2], - dev_info->ingress_priority_map[3], - dev_info->ingress_priority_map[4], - dev_info->ingress_priority_map[5], - dev_info->ingress_priority_map[6], - dev_info->ingress_priority_map[7]); + vlan->ingress_priority_map[0], + vlan->ingress_priority_map[1], + vlan->ingress_priority_map[2], + vlan->ingress_priority_map[3], + vlan->ingress_priority_map[4], + vlan->ingress_priority_map[5], + vlan->ingress_priority_map[6], + vlan->ingress_priority_map[7]); seq_printf(seq, " EGRESS priority mappings: "); for (i = 0; i < 16; i++) { const struct vlan_priority_tci_mapping *mp - = dev_info->egress_priority_map[i]; + = vlan->egress_priority_map[i]; while (mp) { seq_printf(seq, "%u:%hu ", mp->priority, ((mp->vlan_qos >> 13) & 0x7)); diff --git a/net/9p/client.c b/net/9p/client.c index 0505a03c374c..776618cd2be5 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -23,6 +23,8 @@ * */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/module.h> #include <linux/errno.h> #include <linux/fs.h> @@ -38,6 +40,9 @@ #include <net/9p/transport.h> #include "protocol.h" +#define CREATE_TRACE_POINTS +#include <trace/events/9p.h> + /* * Client Option Parsing (code inspired by NFS code) * - a little lazy - parse all client options @@ -78,15 +83,15 @@ static int get_protocol_version(char *s) if (!strcmp(s, "9p2000")) { version = p9_proto_legacy; - P9_DPRINTK(P9_DEBUG_9P, "Protocol version: Legacy\n"); + p9_debug(P9_DEBUG_9P, "Protocol version: Legacy\n"); } else if (!strcmp(s, "9p2000.u")) { version = p9_proto_2000u; - P9_DPRINTK(P9_DEBUG_9P, "Protocol version: 9P2000.u\n"); + p9_debug(P9_DEBUG_9P, "Protocol version: 9P2000.u\n"); } else if (!strcmp(s, "9p2000.L")) { version = p9_proto_2000L; - P9_DPRINTK(P9_DEBUG_9P, "Protocol version: 9P2000.L\n"); + p9_debug(P9_DEBUG_9P, "Protocol version: 9P2000.L\n"); } else - printk(KERN_INFO "9p: Unknown protocol version %s.\n", s); + pr_info("Unknown protocol version %s\n", s); return version; } @@ -116,43 +121,40 @@ static int parse_opts(char *opts, struct p9_client *clnt) tmp_options = kstrdup(opts, GFP_KERNEL); if (!tmp_options) { - P9_DPRINTK(P9_DEBUG_ERROR, - "failed to allocate copy of option string\n"); + p9_debug(P9_DEBUG_ERROR, + "failed to allocate copy of option string\n"); return -ENOMEM; } options = tmp_options; while ((p = strsep(&options, ",")) != NULL) { - int token; + int token, r; if (!*p) continue; token = match_token(p, tokens, args); - if (token < Opt_trans) { - int r = match_int(&args[0], &option); + switch (token) { + case Opt_msize: + r = match_int(&args[0], &option); if (r < 0) { - P9_DPRINTK(P9_DEBUG_ERROR, - "integer field, but no integer?\n"); + p9_debug(P9_DEBUG_ERROR, + "integer field, but no integer?\n"); ret = r; continue; } - } - switch (token) { - case Opt_msize: clnt->msize = option; break; case Opt_trans: s = match_strdup(&args[0]); if (!s) { ret = -ENOMEM; - P9_DPRINTK(P9_DEBUG_ERROR, - "problem allocating copy of trans arg\n"); + p9_debug(P9_DEBUG_ERROR, + "problem allocating copy of trans arg\n"); goto free_and_return; } clnt->trans_mod = v9fs_get_trans_by_name(s); if (clnt->trans_mod == NULL) { - printk(KERN_INFO - "9p: Could not find " - "request transport: %s\n", s); + pr_info("Could not find request transport: %s\n", + s); ret = -EINVAL; kfree(s); goto free_and_return; @@ -166,8 +168,8 @@ static int parse_opts(char *opts, struct p9_client *clnt) s = match_strdup(&args[0]); if (!s) { ret = -ENOMEM; - P9_DPRINTK(P9_DEBUG_ERROR, - "problem allocating copy of version arg\n"); + p9_debug(P9_DEBUG_ERROR, + "problem allocating copy of version arg\n"); goto free_and_return; } ret = get_protocol_version(s); @@ -203,11 +205,13 @@ free_and_return: * */ -static struct p9_req_t *p9_tag_alloc(struct p9_client *c, u16 tag) +static struct p9_req_t * +p9_tag_alloc(struct p9_client *c, u16 tag, unsigned int max_size) { unsigned long flags; int row, col; struct p9_req_t *req; + int alloc_msize = min(c->msize, max_size); /* This looks up the original request by tag so we know which * buffer to read the data into */ @@ -222,7 +226,7 @@ static struct p9_req_t *p9_tag_alloc(struct p9_client *c, u16 tag) sizeof(struct p9_req_t), GFP_ATOMIC); if (!c->reqs[row]) { - printk(KERN_ERR "Couldn't grow tag array\n"); + pr_err("Couldn't grow tag array\n"); spin_unlock_irqrestore(&c->lock, flags); return ERR_PTR(-ENOMEM); } @@ -241,29 +245,16 @@ static struct p9_req_t *p9_tag_alloc(struct p9_client *c, u16 tag) if (!req->tc) { req->wq = kmalloc(sizeof(wait_queue_head_t), GFP_NOFS); if (!req->wq) { - printk(KERN_ERR "Couldn't grow tag array\n"); + pr_err("Couldn't grow tag array\n"); return ERR_PTR(-ENOMEM); } init_waitqueue_head(req->wq); - if ((c->trans_mod->pref & P9_TRANS_PREF_PAYLOAD_MASK) == - P9_TRANS_PREF_PAYLOAD_SEP) { - int alloc_msize = min(c->msize, 4096); - req->tc = kmalloc(sizeof(struct p9_fcall)+alloc_msize, - GFP_NOFS); - req->tc->capacity = alloc_msize; - req->rc = kmalloc(sizeof(struct p9_fcall)+alloc_msize, - GFP_NOFS); - req->rc->capacity = alloc_msize; - } else { - req->tc = kmalloc(sizeof(struct p9_fcall)+c->msize, - GFP_NOFS); - req->tc->capacity = c->msize; - req->rc = kmalloc(sizeof(struct p9_fcall)+c->msize, - GFP_NOFS); - req->rc->capacity = c->msize; - } + req->tc = kmalloc(sizeof(struct p9_fcall) + alloc_msize, + GFP_NOFS); + req->rc = kmalloc(sizeof(struct p9_fcall) + alloc_msize, + GFP_NOFS); if ((!req->tc) || (!req->rc)) { - printk(KERN_ERR "Couldn't grow tag array\n"); + pr_err("Couldn't grow tag array\n"); kfree(req->tc); kfree(req->rc); kfree(req->wq); @@ -271,6 +262,8 @@ static struct p9_req_t *p9_tag_alloc(struct p9_client *c, u16 tag) req->wq = NULL; return ERR_PTR(-ENOMEM); } + req->tc->capacity = alloc_msize; + req->rc->capacity = alloc_msize; req->tc->sdata = (char *) req->tc + sizeof(struct p9_fcall); req->rc->sdata = (char *) req->rc + sizeof(struct p9_fcall); } @@ -351,9 +344,9 @@ static void p9_tag_cleanup(struct p9_client *c) for (row = 0; row < (c->max_tag/P9_ROW_MAXTAG); row++) { for (col = 0; col < P9_ROW_MAXTAG; col++) { if (c->reqs[row][col].status != REQ_STATUS_IDLE) { - P9_DPRINTK(P9_DEBUG_MUX, - "Attempting to cleanup non-free tag %d,%d\n", - row, col); + p9_debug(P9_DEBUG_MUX, + "Attempting to cleanup non-free tag %d,%d\n", + row, col); /* TODO: delay execution of cleanup */ return; } @@ -387,7 +380,7 @@ static void p9_tag_cleanup(struct p9_client *c) static void p9_free_req(struct p9_client *c, struct p9_req_t *r) { int tag = r->tc->tag; - P9_DPRINTK(P9_DEBUG_MUX, "clnt %p req %p tag: %d\n", c, r, tag); + p9_debug(P9_DEBUG_MUX, "clnt %p req %p tag: %d\n", c, r, tag); r->status = REQ_STATUS_IDLE; if (tag != P9_NOTAG && p9_idpool_check(tag, c->tagpool)) @@ -402,9 +395,9 @@ static void p9_free_req(struct p9_client *c, struct p9_req_t *r) */ void p9_client_cb(struct p9_client *c, struct p9_req_t *req) { - P9_DPRINTK(P9_DEBUG_MUX, " tag %d\n", req->tc->tag); + p9_debug(P9_DEBUG_MUX, " tag %d\n", req->tc->tag); wake_up(req->wq); - P9_DPRINTK(P9_DEBUG_MUX, "wakeup: %d\n", req->tc->tag); + p9_debug(P9_DEBUG_MUX, "wakeup: %d\n", req->tc->tag); } EXPORT_SYMBOL(p9_client_cb); @@ -439,8 +432,8 @@ p9_parse_header(struct p9_fcall *pdu, int32_t *size, int8_t *type, int16_t *tag, pdu->id = r_type; pdu->tag = r_tag; - P9_DPRINTK(P9_DEBUG_9P, "<<< size=%d type: %d tag: %d\n", pdu->size, - pdu->id, pdu->tag); + p9_debug(P9_DEBUG_9P, "<<< size=%d type: %d tag: %d\n", + pdu->size, pdu->id, pdu->tag); if (type) *type = r_type; @@ -475,37 +468,22 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req) int ecode; err = p9_parse_header(req->rc, NULL, &type, NULL, 0); + /* + * dump the response from server + * This should be after check errors which poplulate pdu_fcall. + */ + trace_9p_protocol_dump(c, req->rc); if (err) { - P9_DPRINTK(P9_DEBUG_ERROR, "couldn't parse header %d\n", err); + p9_debug(P9_DEBUG_ERROR, "couldn't parse header %d\n", err); return err; } - if (type != P9_RERROR && type != P9_RLERROR) return 0; if (!p9_is_proto_dotl(c)) { char *ename; - - if (req->tc->pbuf_size) { - /* Handle user buffers */ - size_t len = req->rc->size - req->rc->offset; - if (req->tc->pubuf) { - /* User Buffer */ - err = copy_from_user( - &req->rc->sdata[req->rc->offset], - req->tc->pubuf, len); - if (err) { - err = -EFAULT; - goto out_err; - } - } else { - /* Kernel Buffer */ - memmove(&req->rc->sdata[req->rc->offset], - req->tc->pkbuf, len); - } - } err = p9pdu_readf(req->rc, c->proto_version, "s?d", - &ename, &ecode); + &ename, &ecode); if (err) goto out_err; @@ -515,24 +493,131 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req) if (!err || !IS_ERR_VALUE(err)) { err = p9_errstr2errno(ename, strlen(ename)); - P9_DPRINTK(P9_DEBUG_9P, "<<< RERROR (%d) %s\n", -ecode, - ename); - - kfree(ename); + p9_debug(P9_DEBUG_9P, "<<< RERROR (%d) %s\n", + -ecode, ename); } + kfree(ename); } else { err = p9pdu_readf(req->rc, c->proto_version, "d", &ecode); err = -ecode; - P9_DPRINTK(P9_DEBUG_9P, "<<< RLERROR (%d)\n", -ecode); + p9_debug(P9_DEBUG_9P, "<<< RLERROR (%d)\n", -ecode); } - return err; out_err: - P9_DPRINTK(P9_DEBUG_ERROR, "couldn't parse error%d\n", err); + p9_debug(P9_DEBUG_ERROR, "couldn't parse error%d\n", err); + + return err; +} + +/** + * p9_check_zc_errors - check 9p packet for error return and process it + * @c: current client instance + * @req: request to parse and check for error conditions + * @in_hdrlen: Size of response protocol buffer. + * + * returns error code if one is discovered, otherwise returns 0 + * + * this will have to be more complicated if we have multiple + * error packet types + */ + +static int p9_check_zc_errors(struct p9_client *c, struct p9_req_t *req, + char *uidata, int in_hdrlen, int kern_buf) +{ + int err; + int ecode; + int8_t type; + char *ename = NULL; + + err = p9_parse_header(req->rc, NULL, &type, NULL, 0); + /* + * dump the response from server + * This should be after parse_header which poplulate pdu_fcall. + */ + trace_9p_protocol_dump(c, req->rc); + if (err) { + p9_debug(P9_DEBUG_ERROR, "couldn't parse header %d\n", err); + return err; + } + + if (type != P9_RERROR && type != P9_RLERROR) + return 0; + + if (!p9_is_proto_dotl(c)) { + /* Error is reported in string format */ + uint16_t len; + /* 7 = header size for RERROR, 2 is the size of string len; */ + int inline_len = in_hdrlen - (7 + 2); + + /* Read the size of error string */ + err = p9pdu_readf(req->rc, c->proto_version, "w", &len); + if (err) + goto out_err; + + ename = kmalloc(len + 1, GFP_NOFS); + if (!ename) { + err = -ENOMEM; + goto out_err; + } + if (len <= inline_len) { + /* We have error in protocol buffer itself */ + if (pdu_read(req->rc, ename, len)) { + err = -EFAULT; + goto out_free; + + } + } else { + /* + * Part of the data is in user space buffer. + */ + if (pdu_read(req->rc, ename, inline_len)) { + err = -EFAULT; + goto out_free; + + } + if (kern_buf) { + memcpy(ename + inline_len, uidata, + len - inline_len); + } else { + err = copy_from_user(ename + inline_len, + uidata, len - inline_len); + if (err) { + err = -EFAULT; + goto out_free; + } + } + } + ename[len] = 0; + if (p9_is_proto_dotu(c)) { + /* For dotu we also have error code */ + err = p9pdu_readf(req->rc, + c->proto_version, "d", &ecode); + if (err) + goto out_free; + err = -ecode; + } + if (!err || !IS_ERR_VALUE(err)) { + err = p9_errstr2errno(ename, strlen(ename)); + + p9_debug(P9_DEBUG_9P, "<<< RERROR (%d) %s\n", + -ecode, ename); + } + kfree(ename); + } else { + err = p9pdu_readf(req->rc, c->proto_version, "d", &ecode); + err = -ecode; + p9_debug(P9_DEBUG_9P, "<<< RLERROR (%d)\n", -ecode); + } + return err; + +out_free: + kfree(ename); +out_err: + p9_debug(P9_DEBUG_ERROR, "couldn't parse error%d\n", err); return err; } @@ -561,7 +646,7 @@ static int p9_client_flush(struct p9_client *c, struct p9_req_t *oldreq) if (err) return err; - P9_DPRINTK(P9_DEBUG_9P, ">>> TFLUSH tag %d\n", oldtag); + p9_debug(P9_DEBUG_9P, ">>> TFLUSH tag %d\n", oldtag); req = p9_client_rpc(c, P9_TFLUSH, "w", oldtag); if (IS_ERR(req)) @@ -579,25 +664,14 @@ static int p9_client_flush(struct p9_client *c, struct p9_req_t *oldreq) return 0; } -/** - * p9_client_rpc - issue a request and wait for a response - * @c: client session - * @type: type of request - * @fmt: protocol format string (see protocol.c) - * - * Returns request structure (which client must free using p9_free_req) - */ - -static struct p9_req_t * -p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...) +static struct p9_req_t *p9_client_prepare_req(struct p9_client *c, + int8_t type, int req_size, + const char *fmt, va_list ap) { - va_list ap; int tag, err; struct p9_req_t *req; - unsigned long flags; - int sigpending; - P9_DPRINTK(P9_DEBUG_MUX, "client %p op %d\n", c, type); + p9_debug(P9_DEBUG_MUX, "client %p op %d\n", c, type); /* we allow for any status other than disconnected */ if (c->status == Disconnected) @@ -607,12 +681,6 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...) if ((c->status == BeginDisconnect) && (type != P9_TCLUNK)) return ERR_PTR(-EIO); - if (signal_pending(current)) { - sigpending = 1; - clear_thread_flag(TIF_SIGPENDING); - } else - sigpending = 0; - tag = P9_NOTAG; if (type != P9_TVERSION) { tag = p9_idpool_get(c->tagpool); @@ -620,18 +688,51 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...) return ERR_PTR(-ENOMEM); } - req = p9_tag_alloc(c, tag); + req = p9_tag_alloc(c, tag, req_size); if (IS_ERR(req)) return req; /* marshall the data */ p9pdu_prepare(req->tc, tag, type); - va_start(ap, fmt); err = p9pdu_vwritef(req->tc, c->proto_version, fmt, ap); - va_end(ap); if (err) goto reterr; - p9pdu_finalize(req->tc); + p9pdu_finalize(c, req->tc); + trace_9p_client_req(c, type, tag); + return req; +reterr: + p9_free_req(c, req); + return ERR_PTR(err); +} + +/** + * p9_client_rpc - issue a request and wait for a response + * @c: client session + * @type: type of request + * @fmt: protocol format string (see protocol.c) + * + * Returns request structure (which client must free using p9_free_req) + */ + +static struct p9_req_t * +p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...) +{ + va_list ap; + int sigpending, err; + unsigned long flags; + struct p9_req_t *req; + + va_start(ap, fmt); + req = p9_client_prepare_req(c, type, c->msize, fmt, ap); + va_end(ap); + if (IS_ERR(req)) + return req; + + if (signal_pending(current)) { + sigpending = 1; + clear_thread_flag(TIF_SIGPENDING); + } else + sigpending = 0; err = c->trans_mod->request(c, req); if (err < 0) { @@ -639,20 +740,16 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...) c->status = Disconnected; goto reterr; } - - P9_DPRINTK(P9_DEBUG_MUX, "wait %p tag: %d\n", req->wq, tag); + /* Wait for the response */ err = wait_event_interruptible(*req->wq, - req->status >= REQ_STATUS_RCVD); - P9_DPRINTK(P9_DEBUG_MUX, "wait %p tag: %d returned %d\n", - req->wq, tag, err); + req->status >= REQ_STATUS_RCVD); if (req->status == REQ_STATUS_ERROR) { - P9_DPRINTK(P9_DEBUG_ERROR, "req_status error %d\n", req->t_err); + p9_debug(P9_DEBUG_ERROR, "req_status error %d\n", req->t_err); err = req->t_err; } - if ((err == -ERESTARTSYS) && (c->status == Connected)) { - P9_DPRINTK(P9_DEBUG_MUX, "flushing\n"); + p9_debug(P9_DEBUG_MUX, "flushing\n"); sigpending = 1; clear_thread_flag(TIF_SIGPENDING); @@ -663,25 +760,102 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...) if (req->status == REQ_STATUS_RCVD) err = 0; } - if (sigpending) { spin_lock_irqsave(¤t->sighand->siglock, flags); recalc_sigpending(); spin_unlock_irqrestore(¤t->sighand->siglock, flags); } - if (err < 0) goto reterr; err = p9_check_errors(c, req); - if (!err) { - P9_DPRINTK(P9_DEBUG_MUX, "exit: client %p op %d\n", c, type); + trace_9p_client_res(c, type, req->rc->tag, err); + if (!err) return req; +reterr: + p9_free_req(c, req); + return ERR_PTR(err); +} + +/** + * p9_client_zc_rpc - issue a request and wait for a response + * @c: client session + * @type: type of request + * @uidata: user bffer that should be ued for zero copy read + * @uodata: user buffer that shoud be user for zero copy write + * @inlen: read buffer size + * @olen: write buffer size + * @hdrlen: reader header size, This is the size of response protocol data + * @fmt: protocol format string (see protocol.c) + * + * Returns request structure (which client must free using p9_free_req) + */ +static struct p9_req_t *p9_client_zc_rpc(struct p9_client *c, int8_t type, + char *uidata, char *uodata, + int inlen, int olen, int in_hdrlen, + int kern_buf, const char *fmt, ...) +{ + va_list ap; + int sigpending, err; + unsigned long flags; + struct p9_req_t *req; + + va_start(ap, fmt); + /* + * We allocate a inline protocol data of only 4k bytes. + * The actual content is passed in zero-copy fashion. + */ + req = p9_client_prepare_req(c, type, P9_ZC_HDR_SZ, fmt, ap); + va_end(ap); + if (IS_ERR(req)) + return req; + + if (signal_pending(current)) { + sigpending = 1; + clear_thread_flag(TIF_SIGPENDING); + } else + sigpending = 0; + + /* If we are called with KERNEL_DS force kern_buf */ + if (segment_eq(get_fs(), KERNEL_DS)) + kern_buf = 1; + + err = c->trans_mod->zc_request(c, req, uidata, uodata, + inlen, olen, in_hdrlen, kern_buf); + if (err < 0) { + if (err == -EIO) + c->status = Disconnected; + goto reterr; + } + if (req->status == REQ_STATUS_ERROR) { + p9_debug(P9_DEBUG_ERROR, "req_status error %d\n", req->t_err); + err = req->t_err; + } + if ((err == -ERESTARTSYS) && (c->status == Connected)) { + p9_debug(P9_DEBUG_MUX, "flushing\n"); + sigpending = 1; + clear_thread_flag(TIF_SIGPENDING); + + if (c->trans_mod->cancel(c, req)) + p9_client_flush(c, req); + + /* if we received the response anyway, don't signal error */ + if (req->status == REQ_STATUS_RCVD) + err = 0; } + if (sigpending) { + spin_lock_irqsave(¤t->sighand->siglock, flags); + recalc_sigpending(); + spin_unlock_irqrestore(¤t->sighand->siglock, flags); + } + if (err < 0) + goto reterr; + err = p9_check_zc_errors(c, req, uidata, in_hdrlen, kern_buf); + trace_9p_client_res(c, type, req->rc->tag, err); + if (!err) + return req; reterr: - P9_DPRINTK(P9_DEBUG_MUX, "exit: client %p op %d error: %d\n", c, type, - err); p9_free_req(c, req); return ERR_PTR(err); } @@ -692,7 +866,7 @@ static struct p9_fid *p9_fid_create(struct p9_client *clnt) struct p9_fid *fid; unsigned long flags; - P9_DPRINTK(P9_DEBUG_FID, "clnt %p\n", clnt); + p9_debug(P9_DEBUG_FID, "clnt %p\n", clnt); fid = kmalloc(sizeof(struct p9_fid), GFP_KERNEL); if (!fid) return ERR_PTR(-ENOMEM); @@ -725,7 +899,7 @@ static void p9_fid_destroy(struct p9_fid *fid) struct p9_client *clnt; unsigned long flags; - P9_DPRINTK(P9_DEBUG_FID, "fid %d\n", fid->fid); + p9_debug(P9_DEBUG_FID, "fid %d\n", fid->fid); clnt = fid->clnt; p9_idpool_put(fid->fid, clnt->fidpool); spin_lock_irqsave(&clnt->lock, flags); @@ -742,8 +916,8 @@ static int p9_client_version(struct p9_client *c) char *version; int msize; - P9_DPRINTK(P9_DEBUG_9P, ">>> TVERSION msize %d protocol %d\n", - c->msize, c->proto_version); + p9_debug(P9_DEBUG_9P, ">>> TVERSION msize %d protocol %d\n", + c->msize, c->proto_version); switch (c->proto_version) { case p9_proto_2000L: @@ -768,12 +942,12 @@ static int p9_client_version(struct p9_client *c) err = p9pdu_readf(req->rc, c->proto_version, "ds", &msize, &version); if (err) { - P9_DPRINTK(P9_DEBUG_9P, "version error %d\n", err); - P9_DUMP_PKT(1, req->rc); + p9_debug(P9_DEBUG_9P, "version error %d\n", err); + trace_9p_protocol_dump(c, req->rc); goto error; } - P9_DPRINTK(P9_DEBUG_9P, "<<< RVERSION msize %d %s\n", msize, version); + p9_debug(P9_DEBUG_9P, "<<< RVERSION msize %d %s\n", msize, version); if (!strncmp(version, "9P2000.L", 8)) c->proto_version = p9_proto_2000L; else if (!strncmp(version, "9P2000.u", 8)) @@ -823,8 +997,8 @@ struct p9_client *p9_client_create(const char *dev_name, char *options) if (clnt->trans_mod == NULL) { err = -EPROTONOSUPPORT; - P9_DPRINTK(P9_DEBUG_ERROR, - "No transport defined or default transport\n"); + p9_debug(P9_DEBUG_ERROR, + "No transport defined or default transport\n"); goto destroy_tagpool; } @@ -834,8 +1008,8 @@ struct p9_client *p9_client_create(const char *dev_name, char *options) goto put_trans; } - P9_DPRINTK(P9_DEBUG_MUX, "clnt %p trans %p msize %d protocol %d\n", - clnt, clnt->trans_mod, clnt->msize, clnt->proto_version); + p9_debug(P9_DEBUG_MUX, "clnt %p trans %p msize %d protocol %d\n", + clnt, clnt->trans_mod, clnt->msize, clnt->proto_version); err = clnt->trans_mod->create(clnt, dev_name, options); if (err) @@ -868,7 +1042,7 @@ void p9_client_destroy(struct p9_client *clnt) { struct p9_fid *fid, *fidptr; - P9_DPRINTK(P9_DEBUG_MUX, "clnt %p\n", clnt); + p9_debug(P9_DEBUG_MUX, "clnt %p\n", clnt); if (clnt->trans_mod) clnt->trans_mod->close(clnt); @@ -876,7 +1050,7 @@ void p9_client_destroy(struct p9_client *clnt) v9fs_put_trans(clnt->trans_mod); list_for_each_entry_safe(fid, fidptr, &clnt->fidlist, flist) { - printk(KERN_INFO "Found fid %d not clunked\n", fid->fid); + pr_info("Found fid %d not clunked\n", fid->fid); p9_fid_destroy(fid); } @@ -891,14 +1065,14 @@ EXPORT_SYMBOL(p9_client_destroy); void p9_client_disconnect(struct p9_client *clnt) { - P9_DPRINTK(P9_DEBUG_9P, "clnt %p\n", clnt); + p9_debug(P9_DEBUG_9P, "clnt %p\n", clnt); clnt->status = Disconnected; } EXPORT_SYMBOL(p9_client_disconnect); void p9_client_begin_disconnect(struct p9_client *clnt) { - P9_DPRINTK(P9_DEBUG_9P, "clnt %p\n", clnt); + p9_debug(P9_DEBUG_9P, "clnt %p\n", clnt); clnt->status = BeginDisconnect; } EXPORT_SYMBOL(p9_client_begin_disconnect); @@ -906,15 +1080,14 @@ EXPORT_SYMBOL(p9_client_begin_disconnect); struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid, char *uname, u32 n_uname, char *aname) { - int err; + int err = 0; struct p9_req_t *req; struct p9_fid *fid; struct p9_qid qid; - P9_DPRINTK(P9_DEBUG_9P, ">>> TATTACH afid %d uname %s aname %s\n", - afid ? afid->fid : -1, uname, aname); - err = 0; + p9_debug(P9_DEBUG_9P, ">>> TATTACH afid %d uname %s aname %s\n", + afid ? afid->fid : -1, uname, aname); fid = p9_fid_create(clnt); if (IS_ERR(fid)) { err = PTR_ERR(fid); @@ -931,15 +1104,13 @@ struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid, err = p9pdu_readf(req->rc, clnt->proto_version, "Q", &qid); if (err) { - P9_DUMP_PKT(1, req->rc); + trace_9p_protocol_dump(clnt, req->rc); p9_free_req(clnt, req); goto error; } - P9_DPRINTK(P9_DEBUG_9P, "<<< RATTACH qid %x.%llx.%x\n", - qid.type, - (unsigned long long)qid.path, - qid.version); + p9_debug(P9_DEBUG_9P, "<<< RATTACH qid %x.%llx.%x\n", + qid.type, (unsigned long long)qid.path, qid.version); memmove(&fid->qid, &qid, sizeof(struct p9_qid)); @@ -979,8 +1150,8 @@ struct p9_fid *p9_client_walk(struct p9_fid *oldfid, uint16_t nwname, fid = oldfid; - P9_DPRINTK(P9_DEBUG_9P, ">>> TWALK fids %d,%d nwname %ud wname[0] %s\n", - oldfid->fid, fid->fid, nwname, wnames ? wnames[0] : NULL); + p9_debug(P9_DEBUG_9P, ">>> TWALK fids %d,%d nwname %ud wname[0] %s\n", + oldfid->fid, fid->fid, nwname, wnames ? wnames[0] : NULL); req = p9_client_rpc(clnt, P9_TWALK, "ddT", oldfid->fid, fid->fid, nwname, wnames); @@ -991,13 +1162,13 @@ struct p9_fid *p9_client_walk(struct p9_fid *oldfid, uint16_t nwname, err = p9pdu_readf(req->rc, clnt->proto_version, "R", &nwqids, &wqids); if (err) { - P9_DUMP_PKT(1, req->rc); + trace_9p_protocol_dump(clnt, req->rc); p9_free_req(clnt, req); goto clunk_fid; } p9_free_req(clnt, req); - P9_DPRINTK(P9_DEBUG_9P, "<<< RWALK nwqid %d:\n", nwqids); + p9_debug(P9_DEBUG_9P, "<<< RWALK nwqid %d:\n", nwqids); if (nwqids != nwname) { err = -ENOENT; @@ -1005,7 +1176,7 @@ struct p9_fid *p9_client_walk(struct p9_fid *oldfid, uint16_t nwname, } for (count = 0; count < nwqids; count++) - P9_DPRINTK(P9_DEBUG_9P, "<<< [%d] %x.%llx.%x\n", + p9_debug(P9_DEBUG_9P, "<<< [%d] %x.%llx.%x\n", count, wqids[count].type, (unsigned long long)wqids[count].path, wqids[count].version); @@ -1040,7 +1211,7 @@ int p9_client_open(struct p9_fid *fid, int mode) int iounit; clnt = fid->clnt; - P9_DPRINTK(P9_DEBUG_9P, ">>> %s fid %d mode %d\n", + p9_debug(P9_DEBUG_9P, ">>> %s fid %d mode %d\n", p9_is_proto_dotl(clnt) ? "TLOPEN" : "TOPEN", fid->fid, mode); err = 0; @@ -1058,11 +1229,11 @@ int p9_client_open(struct p9_fid *fid, int mode) err = p9pdu_readf(req->rc, clnt->proto_version, "Qd", &qid, &iounit); if (err) { - P9_DUMP_PKT(1, req->rc); + trace_9p_protocol_dump(clnt, req->rc); goto free_and_error; } - P9_DPRINTK(P9_DEBUG_9P, "<<< %s qid %x.%llx.%x iounit %x\n", + p9_debug(P9_DEBUG_9P, "<<< %s qid %x.%llx.%x iounit %x\n", p9_is_proto_dotl(clnt) ? "RLOPEN" : "ROPEN", qid.type, (unsigned long long)qid.path, qid.version, iounit); @@ -1084,7 +1255,7 @@ int p9_client_create_dotl(struct p9_fid *ofid, char *name, u32 flags, u32 mode, struct p9_req_t *req; int iounit; - P9_DPRINTK(P9_DEBUG_9P, + p9_debug(P9_DEBUG_9P, ">>> TLCREATE fid %d name %s flags %d mode %d gid %d\n", ofid->fid, name, flags, mode, gid); clnt = ofid->clnt; @@ -1101,11 +1272,11 @@ int p9_client_create_dotl(struct p9_fid *ofid, char *name, u32 flags, u32 mode, err = p9pdu_readf(req->rc, clnt->proto_version, "Qd", qid, &iounit); if (err) { - P9_DUMP_PKT(1, req->rc); + trace_9p_protocol_dump(clnt, req->rc); goto free_and_error; } - P9_DPRINTK(P9_DEBUG_9P, "<<< RLCREATE qid %x.%llx.%x iounit %x\n", + p9_debug(P9_DEBUG_9P, "<<< RLCREATE qid %x.%llx.%x iounit %x\n", qid->type, (unsigned long long)qid->path, qid->version, iounit); @@ -1129,7 +1300,7 @@ int p9_client_fcreate(struct p9_fid *fid, char *name, u32 perm, int mode, struct p9_qid qid; int iounit; - P9_DPRINTK(P9_DEBUG_9P, ">>> TCREATE fid %d name %s perm %d mode %d\n", + p9_debug(P9_DEBUG_9P, ">>> TCREATE fid %d name %s perm %d mode %d\n", fid->fid, name, perm, mode); err = 0; clnt = fid->clnt; @@ -1146,11 +1317,11 @@ int p9_client_fcreate(struct p9_fid *fid, char *name, u32 perm, int mode, err = p9pdu_readf(req->rc, clnt->proto_version, "Qd", &qid, &iounit); if (err) { - P9_DUMP_PKT(1, req->rc); + trace_9p_protocol_dump(clnt, req->rc); goto free_and_error; } - P9_DPRINTK(P9_DEBUG_9P, "<<< RCREATE qid %x.%llx.%x iounit %x\n", + p9_debug(P9_DEBUG_9P, "<<< RCREATE qid %x.%llx.%x iounit %x\n", qid.type, (unsigned long long)qid.path, qid.version, iounit); @@ -1172,7 +1343,7 @@ int p9_client_symlink(struct p9_fid *dfid, char *name, char *symtgt, gid_t gid, struct p9_client *clnt; struct p9_req_t *req; - P9_DPRINTK(P9_DEBUG_9P, ">>> TSYMLINK dfid %d name %s symtgt %s\n", + p9_debug(P9_DEBUG_9P, ">>> TSYMLINK dfid %d name %s symtgt %s\n", dfid->fid, name, symtgt); clnt = dfid->clnt; @@ -1185,11 +1356,11 @@ int p9_client_symlink(struct p9_fid *dfid, char *name, char *symtgt, gid_t gid, err = p9pdu_readf(req->rc, clnt->proto_version, "Q", qid); if (err) { - P9_DUMP_PKT(1, req->rc); + trace_9p_protocol_dump(clnt, req->rc); goto free_and_error; } - P9_DPRINTK(P9_DEBUG_9P, "<<< RSYMLINK qid %x.%llx.%x\n", + p9_debug(P9_DEBUG_9P, "<<< RSYMLINK qid %x.%llx.%x\n", qid->type, (unsigned long long)qid->path, qid->version); free_and_error: @@ -1204,7 +1375,7 @@ int p9_client_link(struct p9_fid *dfid, struct p9_fid *oldfid, char *newname) struct p9_client *clnt; struct p9_req_t *req; - P9_DPRINTK(P9_DEBUG_9P, ">>> TLINK dfid %d oldfid %d newname %s\n", + p9_debug(P9_DEBUG_9P, ">>> TLINK dfid %d oldfid %d newname %s\n", dfid->fid, oldfid->fid, newname); clnt = dfid->clnt; req = p9_client_rpc(clnt, P9_TLINK, "dds", dfid->fid, oldfid->fid, @@ -1212,7 +1383,7 @@ int p9_client_link(struct p9_fid *dfid, struct p9_fid *oldfid, char *newname) if (IS_ERR(req)) return PTR_ERR(req); - P9_DPRINTK(P9_DEBUG_9P, "<<< RLINK\n"); + p9_debug(P9_DEBUG_9P, "<<< RLINK\n"); p9_free_req(clnt, req); return 0; } @@ -1224,7 +1395,7 @@ int p9_client_fsync(struct p9_fid *fid, int datasync) struct p9_client *clnt; struct p9_req_t *req; - P9_DPRINTK(P9_DEBUG_9P, ">>> TFSYNC fid %d datasync:%d\n", + p9_debug(P9_DEBUG_9P, ">>> TFSYNC fid %d datasync:%d\n", fid->fid, datasync); err = 0; clnt = fid->clnt; @@ -1235,7 +1406,7 @@ int p9_client_fsync(struct p9_fid *fid, int datasync) goto error; } - P9_DPRINTK(P9_DEBUG_9P, "<<< RFSYNC fid %d\n", fid->fid); + p9_debug(P9_DEBUG_9P, "<<< RFSYNC fid %d\n", fid->fid); p9_free_req(clnt, req); @@ -1251,12 +1422,13 @@ int p9_client_clunk(struct p9_fid *fid) struct p9_req_t *req; if (!fid) { - P9_EPRINTK(KERN_WARNING, "Trying to clunk with NULL fid\n"); + pr_warn("%s (%d): Trying to clunk with NULL fid\n", + __func__, task_pid_nr(current)); dump_stack(); return 0; } - P9_DPRINTK(P9_DEBUG_9P, ">>> TCLUNK fid %d\n", fid->fid); + p9_debug(P9_DEBUG_9P, ">>> TCLUNK fid %d\n", fid->fid); err = 0; clnt = fid->clnt; @@ -1266,7 +1438,7 @@ int p9_client_clunk(struct p9_fid *fid) goto error; } - P9_DPRINTK(P9_DEBUG_9P, "<<< RCLUNK fid %d\n", fid->fid); + p9_debug(P9_DEBUG_9P, "<<< RCLUNK fid %d\n", fid->fid); p9_free_req(clnt, req); error: @@ -1284,7 +1456,7 @@ int p9_client_remove(struct p9_fid *fid) struct p9_client *clnt; struct p9_req_t *req; - P9_DPRINTK(P9_DEBUG_9P, ">>> TREMOVE fid %d\n", fid->fid); + p9_debug(P9_DEBUG_9P, ">>> TREMOVE fid %d\n", fid->fid); err = 0; clnt = fid->clnt; @@ -1294,7 +1466,7 @@ int p9_client_remove(struct p9_fid *fid) goto error; } - P9_DPRINTK(P9_DEBUG_9P, "<<< RREMOVE fid %d\n", fid->fid); + p9_debug(P9_DEBUG_9P, "<<< RREMOVE fid %d\n", fid->fid); p9_free_req(clnt, req); error: @@ -1309,7 +1481,7 @@ int p9_client_unlinkat(struct p9_fid *dfid, const char *name, int flags) struct p9_req_t *req; struct p9_client *clnt; - P9_DPRINTK(P9_DEBUG_9P, ">>> TUNLINKAT fid %d %s %d\n", + p9_debug(P9_DEBUG_9P, ">>> TUNLINKAT fid %d %s %d\n", dfid->fid, name, flags); clnt = dfid->clnt; @@ -1318,7 +1490,7 @@ int p9_client_unlinkat(struct p9_fid *dfid, const char *name, int flags) err = PTR_ERR(req); goto error; } - P9_DPRINTK(P9_DEBUG_9P, "<<< RUNLINKAT fid %d %s\n", dfid->fid, name); + p9_debug(P9_DEBUG_9P, "<<< RUNLINKAT fid %d %s\n", dfid->fid, name); p9_free_req(clnt, req); error: @@ -1330,13 +1502,15 @@ int p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset, u32 count) { - int err, rsize; - struct p9_client *clnt; - struct p9_req_t *req; char *dataptr; + int kernel_buf = 0; + struct p9_req_t *req; + struct p9_client *clnt; + int err, rsize, non_zc = 0; + - P9_DPRINTK(P9_DEBUG_9P, ">>> TREAD fid %d offset %llu %d\n", fid->fid, - (long long unsigned) offset, count); + p9_debug(P9_DEBUG_9P, ">>> TREAD fid %d offset %llu %d\n", + fid->fid, (long long unsigned) offset, count); err = 0; clnt = fid->clnt; @@ -1348,13 +1522,24 @@ p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset, rsize = count; /* Don't bother zerocopy for small IO (< 1024) */ - if (((clnt->trans_mod->pref & P9_TRANS_PREF_PAYLOAD_MASK) == - P9_TRANS_PREF_PAYLOAD_SEP) && (rsize > 1024)) { - req = p9_client_rpc(clnt, P9_TREAD, "dqE", fid->fid, offset, - rsize, data, udata); + if (clnt->trans_mod->zc_request && rsize > 1024) { + char *indata; + if (data) { + kernel_buf = 1; + indata = data; + } else + indata = (char *)udata; + /* + * response header len is 11 + * PDU Header(7) + IO Size (4) + */ + req = p9_client_zc_rpc(clnt, P9_TREAD, indata, NULL, rsize, 0, + 11, kernel_buf, "dqd", fid->fid, + offset, rsize); } else { + non_zc = 1; req = p9_client_rpc(clnt, P9_TREAD, "dqd", fid->fid, offset, - rsize); + rsize); } if (IS_ERR(req)) { err = PTR_ERR(req); @@ -1363,14 +1548,13 @@ p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset, err = p9pdu_readf(req->rc, clnt->proto_version, "D", &count, &dataptr); if (err) { - P9_DUMP_PKT(1, req->rc); + trace_9p_protocol_dump(clnt, req->rc); goto free_and_error; } - P9_DPRINTK(P9_DEBUG_9P, "<<< RREAD count %d\n", count); - P9_DUMP_PKT(1, req->rc); + p9_debug(P9_DEBUG_9P, "<<< RREAD count %d\n", count); - if (!req->tc->pbuf_size) { + if (non_zc) { if (data) { memmove(data, dataptr, count); } else { @@ -1396,10 +1580,11 @@ p9_client_write(struct p9_fid *fid, char *data, const char __user *udata, u64 offset, u32 count) { int err, rsize; + int kernel_buf = 0; struct p9_client *clnt; struct p9_req_t *req; - P9_DPRINTK(P9_DEBUG_9P, ">>> TWRITE fid %d offset %llu count %d\n", + p9_debug(P9_DEBUG_9P, ">>> TWRITE fid %d offset %llu count %d\n", fid->fid, (long long unsigned) offset, count); err = 0; clnt = fid->clnt; @@ -1411,19 +1596,24 @@ p9_client_write(struct p9_fid *fid, char *data, const char __user *udata, if (count < rsize) rsize = count; - /* Don't bother zerocopy form small IO (< 1024) */ - if (((clnt->trans_mod->pref & P9_TRANS_PREF_PAYLOAD_MASK) == - P9_TRANS_PREF_PAYLOAD_SEP) && (rsize > 1024)) { - req = p9_client_rpc(clnt, P9_TWRITE, "dqE", fid->fid, offset, - rsize, data, udata); + /* Don't bother zerocopy for small IO (< 1024) */ + if (clnt->trans_mod->zc_request && rsize > 1024) { + char *odata; + if (data) { + kernel_buf = 1; + odata = data; + } else + odata = (char *)udata; + req = p9_client_zc_rpc(clnt, P9_TWRITE, NULL, odata, 0, rsize, + P9_ZC_HDR_SZ, kernel_buf, "dqd", + fid->fid, offset, rsize); } else { - if (data) req = p9_client_rpc(clnt, P9_TWRITE, "dqD", fid->fid, - offset, rsize, data); + offset, rsize, data); else req = p9_client_rpc(clnt, P9_TWRITE, "dqU", fid->fid, - offset, rsize, udata); + offset, rsize, udata); } if (IS_ERR(req)) { err = PTR_ERR(req); @@ -1432,11 +1622,11 @@ p9_client_write(struct p9_fid *fid, char *data, const char __user *udata, err = p9pdu_readf(req->rc, clnt->proto_version, "d", &count); if (err) { - P9_DUMP_PKT(1, req->rc); + trace_9p_protocol_dump(clnt, req->rc); goto free_and_error; } - P9_DPRINTK(P9_DEBUG_9P, "<<< RWRITE count %d\n", count); + p9_debug(P9_DEBUG_9P, "<<< RWRITE count %d\n", count); p9_free_req(clnt, req); return count; @@ -1456,7 +1646,7 @@ struct p9_wstat *p9_client_stat(struct p9_fid *fid) struct p9_req_t *req; u16 ignored; - P9_DPRINTK(P9_DEBUG_9P, ">>> TSTAT fid %d\n", fid->fid); + p9_debug(P9_DEBUG_9P, ">>> TSTAT fid %d\n", fid->fid); if (!ret) return ERR_PTR(-ENOMEM); @@ -1472,12 +1662,12 @@ struct p9_wstat *p9_client_stat(struct p9_fid *fid) err = p9pdu_readf(req->rc, clnt->proto_version, "wS", &ignored, ret); if (err) { - P9_DUMP_PKT(1, req->rc); + trace_9p_protocol_dump(clnt, req->rc); p9_free_req(clnt, req); goto error; } - P9_DPRINTK(P9_DEBUG_9P, + p9_debug(P9_DEBUG_9P, "<<< RSTAT sz=%x type=%x dev=%x qid=%x.%llx.%x\n" "<<< mode=%8.8x atime=%8.8x mtime=%8.8x length=%llx\n" "<<< name=%s uid=%s gid=%s muid=%s extension=(%s)\n" @@ -1506,7 +1696,7 @@ struct p9_stat_dotl *p9_client_getattr_dotl(struct p9_fid *fid, GFP_KERNEL); struct p9_req_t *req; - P9_DPRINTK(P9_DEBUG_9P, ">>> TGETATTR fid %d, request_mask %lld\n", + p9_debug(P9_DEBUG_9P, ">>> TGETATTR fid %d, request_mask %lld\n", fid->fid, request_mask); if (!ret) @@ -1523,12 +1713,12 @@ struct p9_stat_dotl *p9_client_getattr_dotl(struct p9_fid *fid, err = p9pdu_readf(req->rc, clnt->proto_version, "A", ret); if (err) { - P9_DUMP_PKT(1, req->rc); + trace_9p_protocol_dump(clnt, req->rc); p9_free_req(clnt, req); goto error; } - P9_DPRINTK(P9_DEBUG_9P, + p9_debug(P9_DEBUG_9P, "<<< RGETATTR st_result_mask=%lld\n" "<<< qid=%x.%llx.%x\n" "<<< st_mode=%8.8x st_nlink=%llu\n" @@ -1594,8 +1784,8 @@ int p9_client_wstat(struct p9_fid *fid, struct p9_wstat *wst) err = 0; clnt = fid->clnt; wst->size = p9_client_statsize(wst, clnt->proto_version); - P9_DPRINTK(P9_DEBUG_9P, ">>> TWSTAT fid %d\n", fid->fid); - P9_DPRINTK(P9_DEBUG_9P, + p9_debug(P9_DEBUG_9P, ">>> TWSTAT fid %d\n", fid->fid); + p9_debug(P9_DEBUG_9P, " sz=%x type=%x dev=%x qid=%x.%llx.%x\n" " mode=%8.8x atime=%8.8x mtime=%8.8x length=%llx\n" " name=%s uid=%s gid=%s muid=%s extension=(%s)\n" @@ -1612,7 +1802,7 @@ int p9_client_wstat(struct p9_fid *fid, struct p9_wstat *wst) goto error; } - P9_DPRINTK(P9_DEBUG_9P, "<<< RWSTAT fid %d\n", fid->fid); + p9_debug(P9_DEBUG_9P, "<<< RWSTAT fid %d\n", fid->fid); p9_free_req(clnt, req); error: @@ -1628,8 +1818,8 @@ int p9_client_setattr(struct p9_fid *fid, struct p9_iattr_dotl *p9attr) err = 0; clnt = fid->clnt; - P9_DPRINTK(P9_DEBUG_9P, ">>> TSETATTR fid %d\n", fid->fid); - P9_DPRINTK(P9_DEBUG_9P, + p9_debug(P9_DEBUG_9P, ">>> TSETATTR fid %d\n", fid->fid); + p9_debug(P9_DEBUG_9P, " valid=%x mode=%x uid=%d gid=%d size=%lld\n" " atime_sec=%lld atime_nsec=%lld\n" " mtime_sec=%lld mtime_nsec=%lld\n", @@ -1643,7 +1833,7 @@ int p9_client_setattr(struct p9_fid *fid, struct p9_iattr_dotl *p9attr) err = PTR_ERR(req); goto error; } - P9_DPRINTK(P9_DEBUG_9P, "<<< RSETATTR fid %d\n", fid->fid); + p9_debug(P9_DEBUG_9P, "<<< RSETATTR fid %d\n", fid->fid); p9_free_req(clnt, req); error: return err; @@ -1659,7 +1849,7 @@ int p9_client_statfs(struct p9_fid *fid, struct p9_rstatfs *sb) err = 0; clnt = fid->clnt; - P9_DPRINTK(P9_DEBUG_9P, ">>> TSTATFS fid %d\n", fid->fid); + p9_debug(P9_DEBUG_9P, ">>> TSTATFS fid %d\n", fid->fid); req = p9_client_rpc(clnt, P9_TSTATFS, "d", fid->fid); if (IS_ERR(req)) { @@ -1671,12 +1861,12 @@ int p9_client_statfs(struct p9_fid *fid, struct p9_rstatfs *sb) &sb->bsize, &sb->blocks, &sb->bfree, &sb->bavail, &sb->files, &sb->ffree, &sb->fsid, &sb->namelen); if (err) { - P9_DUMP_PKT(1, req->rc); + trace_9p_protocol_dump(clnt, req->rc); p9_free_req(clnt, req); goto error; } - P9_DPRINTK(P9_DEBUG_9P, "<<< RSTATFS fid %d type 0x%lx bsize %ld " + p9_debug(P9_DEBUG_9P, "<<< RSTATFS fid %d type 0x%lx bsize %ld " "blocks %llu bfree %llu bavail %llu files %llu ffree %llu " "fsid %llu namelen %ld\n", fid->fid, (long unsigned int)sb->type, (long int)sb->bsize, @@ -1699,7 +1889,7 @@ int p9_client_rename(struct p9_fid *fid, err = 0; clnt = fid->clnt; - P9_DPRINTK(P9_DEBUG_9P, ">>> TRENAME fid %d newdirfid %d name %s\n", + p9_debug(P9_DEBUG_9P, ">>> TRENAME fid %d newdirfid %d name %s\n", fid->fid, newdirfid->fid, name); req = p9_client_rpc(clnt, P9_TRENAME, "dds", fid->fid, @@ -1709,7 +1899,7 @@ int p9_client_rename(struct p9_fid *fid, goto error; } - P9_DPRINTK(P9_DEBUG_9P, "<<< RRENAME fid %d\n", fid->fid); + p9_debug(P9_DEBUG_9P, "<<< RRENAME fid %d\n", fid->fid); p9_free_req(clnt, req); error: @@ -1727,7 +1917,7 @@ int p9_client_renameat(struct p9_fid *olddirfid, const char *old_name, err = 0; clnt = olddirfid->clnt; - P9_DPRINTK(P9_DEBUG_9P, ">>> TRENAMEAT olddirfid %d old name %s" + p9_debug(P9_DEBUG_9P, ">>> TRENAMEAT olddirfid %d old name %s" " newdirfid %d new name %s\n", olddirfid->fid, old_name, newdirfid->fid, new_name); @@ -1738,7 +1928,7 @@ int p9_client_renameat(struct p9_fid *olddirfid, const char *old_name, goto error; } - P9_DPRINTK(P9_DEBUG_9P, "<<< RRENAMEAT newdirfid %d new name %s\n", + p9_debug(P9_DEBUG_9P, "<<< RRENAMEAT newdirfid %d new name %s\n", newdirfid->fid, new_name); p9_free_req(clnt, req); @@ -1766,7 +1956,7 @@ struct p9_fid *p9_client_xattrwalk(struct p9_fid *file_fid, attr_fid = NULL; goto error; } - P9_DPRINTK(P9_DEBUG_9P, + p9_debug(P9_DEBUG_9P, ">>> TXATTRWALK file_fid %d, attr_fid %d name %s\n", file_fid->fid, attr_fid->fid, attr_name); @@ -1778,12 +1968,12 @@ struct p9_fid *p9_client_xattrwalk(struct p9_fid *file_fid, } err = p9pdu_readf(req->rc, clnt->proto_version, "q", attr_size); if (err) { - P9_DUMP_PKT(1, req->rc); + trace_9p_protocol_dump(clnt, req->rc); p9_free_req(clnt, req); goto clunk_fid; } p9_free_req(clnt, req); - P9_DPRINTK(P9_DEBUG_9P, "<<< RXATTRWALK fid %d size %llu\n", + p9_debug(P9_DEBUG_9P, "<<< RXATTRWALK fid %d size %llu\n", attr_fid->fid, *attr_size); return attr_fid; clunk_fid: @@ -1804,7 +1994,7 @@ int p9_client_xattrcreate(struct p9_fid *fid, const char *name, struct p9_req_t *req; struct p9_client *clnt; - P9_DPRINTK(P9_DEBUG_9P, + p9_debug(P9_DEBUG_9P, ">>> TXATTRCREATE fid %d name %s size %lld flag %d\n", fid->fid, name, (long long)attr_size, flags); err = 0; @@ -1815,7 +2005,7 @@ int p9_client_xattrcreate(struct p9_fid *fid, const char *name, err = PTR_ERR(req); goto error; } - P9_DPRINTK(P9_DEBUG_9P, "<<< RXATTRCREATE fid %d\n", fid->fid); + p9_debug(P9_DEBUG_9P, "<<< RXATTRCREATE fid %d\n", fid->fid); p9_free_req(clnt, req); error: return err; @@ -1824,12 +2014,12 @@ EXPORT_SYMBOL_GPL(p9_client_xattrcreate); int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset) { - int err, rsize; + int err, rsize, non_zc = 0; struct p9_client *clnt; struct p9_req_t *req; char *dataptr; - P9_DPRINTK(P9_DEBUG_9P, ">>> TREADDIR fid %d offset %llu count %d\n", + p9_debug(P9_DEBUG_9P, ">>> TREADDIR fid %d offset %llu count %d\n", fid->fid, (long long unsigned) offset, count); err = 0; @@ -1842,13 +2032,18 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset) if (count < rsize) rsize = count; - if ((clnt->trans_mod->pref & P9_TRANS_PREF_PAYLOAD_MASK) == - P9_TRANS_PREF_PAYLOAD_SEP) { - req = p9_client_rpc(clnt, P9_TREADDIR, "dqF", fid->fid, - offset, rsize, data); + /* Don't bother zerocopy for small IO (< 1024) */ + if (clnt->trans_mod->zc_request && rsize > 1024) { + /* + * response header len is 11 + * PDU Header(7) + IO Size (4) + */ + req = p9_client_zc_rpc(clnt, P9_TREADDIR, data, NULL, rsize, 0, + 11, 1, "dqd", fid->fid, offset, rsize); } else { + non_zc = 1; req = p9_client_rpc(clnt, P9_TREADDIR, "dqd", fid->fid, - offset, rsize); + offset, rsize); } if (IS_ERR(req)) { err = PTR_ERR(req); @@ -1857,13 +2052,13 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset) err = p9pdu_readf(req->rc, clnt->proto_version, "D", &count, &dataptr); if (err) { - P9_DUMP_PKT(1, req->rc); + trace_9p_protocol_dump(clnt, req->rc); goto free_and_error; } - P9_DPRINTK(P9_DEBUG_9P, "<<< RREADDIR count %d\n", count); + p9_debug(P9_DEBUG_9P, "<<< RREADDIR count %d\n", count); - if (!req->tc->pbuf_size && data) + if (non_zc) memmove(data, dataptr, count); p9_free_req(clnt, req); @@ -1885,7 +2080,7 @@ int p9_client_mknod_dotl(struct p9_fid *fid, char *name, int mode, err = 0; clnt = fid->clnt; - P9_DPRINTK(P9_DEBUG_9P, ">>> TMKNOD fid %d name %s mode %d major %d " + p9_debug(P9_DEBUG_9P, ">>> TMKNOD fid %d name %s mode %d major %d " "minor %d\n", fid->fid, name, mode, MAJOR(rdev), MINOR(rdev)); req = p9_client_rpc(clnt, P9_TMKNOD, "dsdddd", fid->fid, name, mode, MAJOR(rdev), MINOR(rdev), gid); @@ -1894,10 +2089,10 @@ int p9_client_mknod_dotl(struct p9_fid *fid, char *name, int mode, err = p9pdu_readf(req->rc, clnt->proto_version, "Q", qid); if (err) { - P9_DUMP_PKT(1, req->rc); + trace_9p_protocol_dump(clnt, req->rc); goto error; } - P9_DPRINTK(P9_DEBUG_9P, "<<< RMKNOD qid %x.%llx.%x\n", qid->type, + p9_debug(P9_DEBUG_9P, "<<< RMKNOD qid %x.%llx.%x\n", qid->type, (unsigned long long)qid->path, qid->version); error: @@ -1916,7 +2111,7 @@ int p9_client_mkdir_dotl(struct p9_fid *fid, char *name, int mode, err = 0; clnt = fid->clnt; - P9_DPRINTK(P9_DEBUG_9P, ">>> TMKDIR fid %d name %s mode %d gid %d\n", + p9_debug(P9_DEBUG_9P, ">>> TMKDIR fid %d name %s mode %d gid %d\n", fid->fid, name, mode, gid); req = p9_client_rpc(clnt, P9_TMKDIR, "dsdd", fid->fid, name, mode, gid); @@ -1925,10 +2120,10 @@ int p9_client_mkdir_dotl(struct p9_fid *fid, char *name, int mode, err = p9pdu_readf(req->rc, clnt->proto_version, "Q", qid); if (err) { - P9_DUMP_PKT(1, req->rc); + trace_9p_protocol_dump(clnt, req->rc); goto error; } - P9_DPRINTK(P9_DEBUG_9P, "<<< RMKDIR qid %x.%llx.%x\n", qid->type, + p9_debug(P9_DEBUG_9P, "<<< RMKDIR qid %x.%llx.%x\n", qid->type, (unsigned long long)qid->path, qid->version); error: @@ -1946,7 +2141,7 @@ int p9_client_lock_dotl(struct p9_fid *fid, struct p9_flock *flock, u8 *status) err = 0; clnt = fid->clnt; - P9_DPRINTK(P9_DEBUG_9P, ">>> TLOCK fid %d type %i flags %d " + p9_debug(P9_DEBUG_9P, ">>> TLOCK fid %d type %i flags %d " "start %lld length %lld proc_id %d client_id %s\n", fid->fid, flock->type, flock->flags, flock->start, flock->length, flock->proc_id, flock->client_id); @@ -1960,10 +2155,10 @@ int p9_client_lock_dotl(struct p9_fid *fid, struct p9_flock *flock, u8 *status) err = p9pdu_readf(req->rc, clnt->proto_version, "b", status); if (err) { - P9_DUMP_PKT(1, req->rc); + trace_9p_protocol_dump(clnt, req->rc); goto error; } - P9_DPRINTK(P9_DEBUG_9P, "<<< RLOCK status %i\n", *status); + p9_debug(P9_DEBUG_9P, "<<< RLOCK status %i\n", *status); error: p9_free_req(clnt, req); return err; @@ -1979,7 +2174,7 @@ int p9_client_getlock_dotl(struct p9_fid *fid, struct p9_getlock *glock) err = 0; clnt = fid->clnt; - P9_DPRINTK(P9_DEBUG_9P, ">>> TGETLOCK fid %d, type %i start %lld " + p9_debug(P9_DEBUG_9P, ">>> TGETLOCK fid %d, type %i start %lld " "length %lld proc_id %d client_id %s\n", fid->fid, glock->type, glock->start, glock->length, glock->proc_id, glock->client_id); @@ -1993,10 +2188,10 @@ int p9_client_getlock_dotl(struct p9_fid *fid, struct p9_getlock *glock) &glock->start, &glock->length, &glock->proc_id, &glock->client_id); if (err) { - P9_DUMP_PKT(1, req->rc); + trace_9p_protocol_dump(clnt, req->rc); goto error; } - P9_DPRINTK(P9_DEBUG_9P, "<<< RGETLOCK type %i start %lld length %lld " + p9_debug(P9_DEBUG_9P, "<<< RGETLOCK type %i start %lld length %lld " "proc_id %d client_id %s\n", glock->type, glock->start, glock->length, glock->proc_id, glock->client_id); error: @@ -2013,7 +2208,7 @@ int p9_client_readlink(struct p9_fid *fid, char **target) err = 0; clnt = fid->clnt; - P9_DPRINTK(P9_DEBUG_9P, ">>> TREADLINK fid %d\n", fid->fid); + p9_debug(P9_DEBUG_9P, ">>> TREADLINK fid %d\n", fid->fid); req = p9_client_rpc(clnt, P9_TREADLINK, "d", fid->fid); if (IS_ERR(req)) @@ -2021,10 +2216,10 @@ int p9_client_readlink(struct p9_fid *fid, char **target) err = p9pdu_readf(req->rc, clnt->proto_version, "s", target); if (err) { - P9_DUMP_PKT(1, req->rc); + trace_9p_protocol_dump(clnt, req->rc); goto error; } - P9_DPRINTK(P9_DEBUG_9P, "<<< RREADLINK target %s\n", *target); + p9_debug(P9_DEBUG_9P, "<<< RREADLINK target %s\n", *target); error: p9_free_req(clnt, req); return err; diff --git a/net/9p/error.c b/net/9p/error.c index 52518512a93e..2ab2de76010f 100644 --- a/net/9p/error.c +++ b/net/9p/error.c @@ -27,6 +27,8 @@ * */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/module.h> #include <linux/list.h> #include <linux/jhash.h> @@ -237,8 +239,8 @@ int p9_errstr2errno(char *errstr, int len) if (errno == 0) { /* TODO: if error isn't found, add it dynamically */ errstr[len] = 0; - printk(KERN_ERR "%s: server reported unknown error %s\n", - __func__, errstr); + pr_err("%s: server reported unknown error %s\n", + __func__, errstr); errno = ESERVERFAULT; } diff --git a/net/9p/mod.c b/net/9p/mod.c index 2664d1292291..6ab36aea7727 100644 --- a/net/9p/mod.c +++ b/net/9p/mod.c @@ -24,7 +24,11 @@ * */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/module.h> +#include <linux/errno.h> +#include <linux/sched.h> #include <linux/moduleparam.h> #include <net/9p/9p.h> #include <linux/fs.h> @@ -39,6 +43,29 @@ unsigned int p9_debug_level = 0; /* feature-rific global debug level */ EXPORT_SYMBOL(p9_debug_level); module_param_named(debug, p9_debug_level, uint, 0); MODULE_PARM_DESC(debug, "9P debugging level"); + +void _p9_debug(enum p9_debug_flags level, const char *func, + const char *fmt, ...) +{ + struct va_format vaf; + va_list args; + + if ((p9_debug_level & level) != level) + return; + + va_start(args, fmt); + + vaf.fmt = fmt; + vaf.va = &args; + + if (level == P9_DEBUG_9P) + pr_notice("(%8.8d) %pV", task_pid_nr(current), &vaf); + else + pr_notice("-- %s (%d): %pV", func, task_pid_nr(current), &vaf); + + va_end(args); +} +EXPORT_SYMBOL(_p9_debug); #endif /* @@ -147,7 +174,7 @@ static int __init init_p9(void) int ret = 0; p9_error_init(); - printk(KERN_INFO "Installing 9P2000 support\n"); + pr_info("Installing 9P2000 support\n"); p9_trans_fd_init(); return ret; @@ -160,7 +187,7 @@ static int __init init_p9(void) static void __exit exit_p9(void) { - printk(KERN_INFO "Unloading 9P2000 support\n"); + pr_info("Unloading 9P2000 support\n"); p9_trans_fd_exit(); } diff --git a/net/9p/protocol.c b/net/9p/protocol.c index df58375ea6b3..9ee48cb30179 100644 --- a/net/9p/protocol.c +++ b/net/9p/protocol.c @@ -37,40 +37,11 @@ #include <net/9p/client.h> #include "protocol.h" +#include <trace/events/9p.h> + static int p9pdu_writef(struct p9_fcall *pdu, int proto_version, const char *fmt, ...); -#ifdef CONFIG_NET_9P_DEBUG -void -p9pdu_dump(int way, struct p9_fcall *pdu) -{ - int len = pdu->size; - - if ((p9_debug_level & P9_DEBUG_VPKT) != P9_DEBUG_VPKT) { - if ((p9_debug_level & P9_DEBUG_PKT) == P9_DEBUG_PKT) { - if (len > 32) - len = 32; - } else { - /* shouldn't happen */ - return; - } - } - - if (way) - print_hex_dump_bytes("[9P] ", DUMP_PREFIX_OFFSET, pdu->sdata, - len); - else - print_hex_dump_bytes("]9P[ ", DUMP_PREFIX_OFFSET, pdu->sdata, - len); -} -#else -void -p9pdu_dump(int way, struct p9_fcall *pdu) -{ -} -#endif -EXPORT_SYMBOL(p9pdu_dump); - void p9stat_free(struct p9_wstat *stbuf) { kfree(stbuf->name); @@ -81,7 +52,7 @@ void p9stat_free(struct p9_wstat *stbuf) } EXPORT_SYMBOL(p9stat_free); -static size_t pdu_read(struct p9_fcall *pdu, void *data, size_t size) +size_t pdu_read(struct p9_fcall *pdu, void *data, size_t size) { size_t len = min(pdu->size - pdu->offset, size); memcpy(data, &pdu->sdata[pdu->offset], len); @@ -108,26 +79,6 @@ pdu_write_u(struct p9_fcall *pdu, const char __user *udata, size_t size) return size - len; } -static size_t -pdu_write_urw(struct p9_fcall *pdu, const char *kdata, const char __user *udata, - size_t size) -{ - BUG_ON(pdu->size > P9_IOHDRSZ); - pdu->pubuf = (char __user *)udata; - pdu->pkbuf = (char *)kdata; - pdu->pbuf_size = size; - return 0; -} - -static size_t -pdu_write_readdir(struct p9_fcall *pdu, const char *kdata, size_t size) -{ - BUG_ON(pdu->size > P9_READDIRHDRSZ); - pdu->pkbuf = (char *)kdata; - pdu->pbuf_size = size; - return 0; -} - /* b - int8_t w - int16_t @@ -459,26 +410,6 @@ p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt, errcode = -EFAULT; } break; - case 'E':{ - int32_t cnt = va_arg(ap, int32_t); - const char *k = va_arg(ap, const void *); - const char __user *u = va_arg(ap, - const void __user *); - errcode = p9pdu_writef(pdu, proto_version, "d", - cnt); - if (!errcode && pdu_write_urw(pdu, k, u, cnt)) - errcode = -EFAULT; - } - break; - case 'F':{ - int32_t cnt = va_arg(ap, int32_t); - const char *k = va_arg(ap, const void *); - errcode = p9pdu_writef(pdu, proto_version, "d", - cnt); - if (!errcode && pdu_write_readdir(pdu, k, cnt)) - errcode = -EFAULT; - } - break; case 'U':{ int32_t count = va_arg(ap, int32_t); const char __user *udata = @@ -591,7 +522,7 @@ p9pdu_writef(struct p9_fcall *pdu, int proto_version, const char *fmt, ...) return ret; } -int p9stat_read(char *buf, int len, struct p9_wstat *st, int proto_version) +int p9stat_read(struct p9_client *clnt, char *buf, int len, struct p9_wstat *st) { struct p9_fcall fake_pdu; int ret; @@ -601,10 +532,10 @@ int p9stat_read(char *buf, int len, struct p9_wstat *st, int proto_version) fake_pdu.sdata = buf; fake_pdu.offset = 0; - ret = p9pdu_readf(&fake_pdu, proto_version, "S", st); + ret = p9pdu_readf(&fake_pdu, clnt->proto_version, "S", st); if (ret) { - P9_DPRINTK(P9_DEBUG_9P, "<<< p9stat_read failed: %d\n", ret); - P9_DUMP_PKT(0, &fake_pdu); + p9_debug(P9_DEBUG_9P, "<<< p9stat_read failed: %d\n", ret); + trace_9p_protocol_dump(clnt, &fake_pdu); } return ret; @@ -617,7 +548,7 @@ int p9pdu_prepare(struct p9_fcall *pdu, int16_t tag, int8_t type) return p9pdu_writef(pdu, 0, "dbw", 0, type, tag); } -int p9pdu_finalize(struct p9_fcall *pdu) +int p9pdu_finalize(struct p9_client *clnt, struct p9_fcall *pdu) { int size = pdu->size; int err; @@ -626,9 +557,9 @@ int p9pdu_finalize(struct p9_fcall *pdu) err = p9pdu_writef(pdu, 0, "d", size); pdu->size = size; - P9_DUMP_PKT(0, pdu); - P9_DPRINTK(P9_DEBUG_9P, ">>> size=%d type: %d tag: %d\n", pdu->size, - pdu->id, pdu->tag); + trace_9p_protocol_dump(clnt, pdu); + p9_debug(P9_DEBUG_9P, ">>> size=%d type: %d tag: %d\n", + pdu->size, pdu->id, pdu->tag); return err; } @@ -637,14 +568,10 @@ void p9pdu_reset(struct p9_fcall *pdu) { pdu->offset = 0; pdu->size = 0; - pdu->private = NULL; - pdu->pubuf = NULL; - pdu->pkbuf = NULL; - pdu->pbuf_size = 0; } -int p9dirent_read(char *buf, int len, struct p9_dirent *dirent, - int proto_version) +int p9dirent_read(struct p9_client *clnt, char *buf, int len, + struct p9_dirent *dirent) { struct p9_fcall fake_pdu; int ret; @@ -655,11 +582,11 @@ int p9dirent_read(char *buf, int len, struct p9_dirent *dirent, fake_pdu.sdata = buf; fake_pdu.offset = 0; - ret = p9pdu_readf(&fake_pdu, proto_version, "Qqbs", &dirent->qid, - &dirent->d_off, &dirent->d_type, &nameptr); + ret = p9pdu_readf(&fake_pdu, clnt->proto_version, "Qqbs", &dirent->qid, + &dirent->d_off, &dirent->d_type, &nameptr); if (ret) { - P9_DPRINTK(P9_DEBUG_9P, "<<< p9dirent_read failed: %d\n", ret); - P9_DUMP_PKT(1, &fake_pdu); + p9_debug(P9_DEBUG_9P, "<<< p9dirent_read failed: %d\n", ret); + trace_9p_protocol_dump(clnt, &fake_pdu); goto out; } diff --git a/net/9p/protocol.h b/net/9p/protocol.h index 2431c0f38d56..2cc525fa49fa 100644 --- a/net/9p/protocol.h +++ b/net/9p/protocol.h @@ -29,6 +29,6 @@ int p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt, va_list ap); int p9pdu_readf(struct p9_fcall *pdu, int proto_version, const char *fmt, ...); int p9pdu_prepare(struct p9_fcall *pdu, int16_t tag, int8_t type); -int p9pdu_finalize(struct p9_fcall *pdu); -void p9pdu_dump(int, struct p9_fcall *); +int p9pdu_finalize(struct p9_client *clnt, struct p9_fcall *pdu); void p9pdu_reset(struct p9_fcall *pdu); +size_t pdu_read(struct p9_fcall *pdu, void *data, size_t size); diff --git a/net/9p/trans_common.c b/net/9p/trans_common.c index 9a70ebdec56e..de8df957867d 100644 --- a/net/9p/trans_common.c +++ b/net/9p/trans_common.c @@ -21,30 +21,25 @@ /** * p9_release_req_pages - Release pages after the transaction. - * @*private: PDU's private page of struct trans_rpage_info */ -void -p9_release_req_pages(struct trans_rpage_info *rpinfo) +void p9_release_pages(struct page **pages, int nr_pages) { int i = 0; - - while (rpinfo->rp_data[i] && rpinfo->rp_nr_pages--) { - put_page(rpinfo->rp_data[i]); + while (pages[i] && nr_pages--) { + put_page(pages[i]); i++; } } -EXPORT_SYMBOL(p9_release_req_pages); +EXPORT_SYMBOL(p9_release_pages); /** * p9_nr_pages - Return number of pages needed to accommodate the payload. */ -int -p9_nr_pages(struct p9_req_t *req) +int p9_nr_pages(char *data, int len) { unsigned long start_page, end_page; - start_page = (unsigned long)req->tc->pubuf >> PAGE_SHIFT; - end_page = ((unsigned long)req->tc->pubuf + req->tc->pbuf_size + - PAGE_SIZE - 1) >> PAGE_SHIFT; + start_page = (unsigned long)data >> PAGE_SHIFT; + end_page = ((unsigned long)data + len + PAGE_SIZE - 1) >> PAGE_SHIFT; return end_page - start_page; } EXPORT_SYMBOL(p9_nr_pages); @@ -58,35 +53,17 @@ EXPORT_SYMBOL(p9_nr_pages); * @nr_pages: number of pages to accommodate the payload * @rw: Indicates if the pages are for read or write. */ -int -p9_payload_gup(struct p9_req_t *req, size_t *pdata_off, int *pdata_len, - int nr_pages, u8 rw) -{ - uint32_t first_page_bytes = 0; - int32_t pdata_mapped_pages; - struct trans_rpage_info *rpinfo; - - *pdata_off = (__force size_t)req->tc->pubuf & (PAGE_SIZE-1); - if (*pdata_off) - first_page_bytes = min(((size_t)PAGE_SIZE - *pdata_off), - req->tc->pbuf_size); +int p9_payload_gup(char *data, int *nr_pages, struct page **pages, int write) +{ + int nr_mapped_pages; - rpinfo = req->tc->private; - pdata_mapped_pages = get_user_pages_fast((unsigned long)req->tc->pubuf, - nr_pages, rw, &rpinfo->rp_data[0]); - if (pdata_mapped_pages <= 0) - return pdata_mapped_pages; + nr_mapped_pages = get_user_pages_fast((unsigned long)data, + *nr_pages, write, pages); + if (nr_mapped_pages <= 0) + return nr_mapped_pages; - rpinfo->rp_nr_pages = pdata_mapped_pages; - if (*pdata_off) { - *pdata_len = first_page_bytes; - *pdata_len += min((req->tc->pbuf_size - *pdata_len), - ((size_t)pdata_mapped_pages - 1) << PAGE_SHIFT); - } else { - *pdata_len = min(req->tc->pbuf_size, - (size_t)pdata_mapped_pages << PAGE_SHIFT); - } + *nr_pages = nr_mapped_pages; return 0; } EXPORT_SYMBOL(p9_payload_gup); diff --git a/net/9p/trans_common.h b/net/9p/trans_common.h index 76309223bb02..173bb550a9eb 100644 --- a/net/9p/trans_common.h +++ b/net/9p/trans_common.h @@ -12,21 +12,6 @@ * */ -/* TRUE if it is user context */ -#define P9_IS_USER_CONTEXT (!segment_eq(get_fs(), KERNEL_DS)) - -/** - * struct trans_rpage_info - To store mapped page information in PDU. - * @rp_alloc:Set if this structure is allocd, not a reuse unused space in pdu. - * @rp_nr_pages: Number of mapped pages - * @rp_data: Array of page pointers - */ -struct trans_rpage_info { - u8 rp_alloc; - int rp_nr_pages; - struct page *rp_data[0]; -}; - -void p9_release_req_pages(struct trans_rpage_info *); -int p9_payload_gup(struct p9_req_t *, size_t *, int *, int, u8); -int p9_nr_pages(struct p9_req_t *); +void p9_release_pages(struct page **, int); +int p9_payload_gup(char *, int *, struct page **, int); +int p9_nr_pages(char *, int); diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index fdfdb5747f63..fccae26fa674 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -25,6 +25,8 @@ * */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/in.h> #include <linux/module.h> #include <linux/net.h> @@ -191,7 +193,7 @@ static void p9_conn_cancel(struct p9_conn *m, int err) unsigned long flags; LIST_HEAD(cancel_list); - P9_DPRINTK(P9_DEBUG_ERROR, "mux %p err %d\n", m, err); + p9_debug(P9_DEBUG_ERROR, "mux %p err %d\n", m, err); spin_lock_irqsave(&m->client->lock, flags); @@ -217,7 +219,7 @@ static void p9_conn_cancel(struct p9_conn *m, int err) spin_unlock_irqrestore(&m->client->lock, flags); list_for_each_entry_safe(req, rtmp, &cancel_list, req_list) { - P9_DPRINTK(P9_DEBUG_ERROR, "call back req %p\n", req); + p9_debug(P9_DEBUG_ERROR, "call back req %p\n", req); list_del(&req->req_list); p9_client_cb(m->client, req); } @@ -275,7 +277,7 @@ static int p9_fd_read(struct p9_client *client, void *v, int len) return -EREMOTEIO; if (!(ts->rd->f_flags & O_NONBLOCK)) - P9_DPRINTK(P9_DEBUG_ERROR, "blocking read ...\n"); + p9_debug(P9_DEBUG_ERROR, "blocking read ...\n"); ret = kernel_read(ts->rd, ts->rd->f_pos, v, len); if (ret <= 0 && ret != -ERESTARTSYS && ret != -EAGAIN) @@ -299,7 +301,7 @@ static void p9_read_work(struct work_struct *work) if (m->err < 0) return; - P9_DPRINTK(P9_DEBUG_TRANS, "start mux %p pos %d\n", m, m->rpos); + p9_debug(P9_DEBUG_TRANS, "start mux %p pos %d\n", m, m->rpos); if (!m->rbuf) { m->rbuf = m->tmp_buf; @@ -308,11 +310,11 @@ static void p9_read_work(struct work_struct *work) } clear_bit(Rpending, &m->wsched); - P9_DPRINTK(P9_DEBUG_TRANS, "read mux %p pos %d size: %d = %d\n", m, - m->rpos, m->rsize, m->rsize-m->rpos); + p9_debug(P9_DEBUG_TRANS, "read mux %p pos %d size: %d = %d\n", + m, m->rpos, m->rsize, m->rsize-m->rpos); err = p9_fd_read(m->client, m->rbuf + m->rpos, m->rsize - m->rpos); - P9_DPRINTK(P9_DEBUG_TRANS, "mux %p got %d bytes\n", m, err); + p9_debug(P9_DEBUG_TRANS, "mux %p got %d bytes\n", m, err); if (err == -EAGAIN) { clear_bit(Rworksched, &m->wsched); return; @@ -325,25 +327,25 @@ static void p9_read_work(struct work_struct *work) if ((!m->req) && (m->rpos == m->rsize)) { /* header read in */ u16 tag; - P9_DPRINTK(P9_DEBUG_TRANS, "got new header\n"); + p9_debug(P9_DEBUG_TRANS, "got new header\n"); n = le32_to_cpu(*(__le32 *) m->rbuf); /* read packet size */ if (n >= m->client->msize) { - P9_DPRINTK(P9_DEBUG_ERROR, - "requested packet size too big: %d\n", n); + p9_debug(P9_DEBUG_ERROR, + "requested packet size too big: %d\n", n); err = -EIO; goto error; } tag = le16_to_cpu(*(__le16 *) (m->rbuf+5)); /* read tag */ - P9_DPRINTK(P9_DEBUG_TRANS, - "mux %p pkt: size: %d bytes tag: %d\n", m, n, tag); + p9_debug(P9_DEBUG_TRANS, + "mux %p pkt: size: %d bytes tag: %d\n", m, n, tag); m->req = p9_tag_lookup(m->client, tag); if (!m->req || (m->req->status != REQ_STATUS_SENT && m->req->status != REQ_STATUS_FLSH)) { - P9_DPRINTK(P9_DEBUG_ERROR, "Unexpected packet tag %d\n", - tag); + p9_debug(P9_DEBUG_ERROR, "Unexpected packet tag %d\n", + tag); err = -EIO; goto error; } @@ -364,7 +366,7 @@ static void p9_read_work(struct work_struct *work) /* not an else because some packets (like clunk) have no payload */ if ((m->req) && (m->rpos == m->rsize)) { /* packet is read in */ - P9_DPRINTK(P9_DEBUG_TRANS, "got new packet\n"); + p9_debug(P9_DEBUG_TRANS, "got new packet\n"); spin_lock(&m->client->lock); if (m->req->status != REQ_STATUS_ERROR) m->req->status = REQ_STATUS_RCVD; @@ -384,7 +386,7 @@ static void p9_read_work(struct work_struct *work) n = p9_fd_poll(m->client, NULL); if (n & POLLIN) { - P9_DPRINTK(P9_DEBUG_TRANS, "sched read work %p\n", m); + p9_debug(P9_DEBUG_TRANS, "sched read work %p\n", m); schedule_work(&m->rq); } else clear_bit(Rworksched, &m->wsched); @@ -418,7 +420,7 @@ static int p9_fd_write(struct p9_client *client, void *v, int len) return -EREMOTEIO; if (!(ts->wr->f_flags & O_NONBLOCK)) - P9_DPRINTK(P9_DEBUG_ERROR, "blocking write ...\n"); + p9_debug(P9_DEBUG_ERROR, "blocking write ...\n"); oldfs = get_fs(); set_fs(get_ds()); @@ -460,7 +462,7 @@ static void p9_write_work(struct work_struct *work) req = list_entry(m->unsent_req_list.next, struct p9_req_t, req_list); req->status = REQ_STATUS_SENT; - P9_DPRINTK(P9_DEBUG_TRANS, "move req %p\n", req); + p9_debug(P9_DEBUG_TRANS, "move req %p\n", req); list_move_tail(&req->req_list, &m->req_list); m->wbuf = req->tc->sdata; @@ -469,11 +471,11 @@ static void p9_write_work(struct work_struct *work) spin_unlock(&m->client->lock); } - P9_DPRINTK(P9_DEBUG_TRANS, "mux %p pos %d size %d\n", m, m->wpos, - m->wsize); + p9_debug(P9_DEBUG_TRANS, "mux %p pos %d size %d\n", + m, m->wpos, m->wsize); clear_bit(Wpending, &m->wsched); err = p9_fd_write(m->client, m->wbuf + m->wpos, m->wsize - m->wpos); - P9_DPRINTK(P9_DEBUG_TRANS, "mux %p sent %d bytes\n", m, err); + p9_debug(P9_DEBUG_TRANS, "mux %p sent %d bytes\n", m, err); if (err == -EAGAIN) { clear_bit(Wworksched, &m->wsched); return; @@ -497,7 +499,7 @@ static void p9_write_work(struct work_struct *work) n = p9_fd_poll(m->client, NULL); if (n & POLLOUT) { - P9_DPRINTK(P9_DEBUG_TRANS, "sched write work %p\n", m); + p9_debug(P9_DEBUG_TRANS, "sched write work %p\n", m); schedule_work(&m->wq); } else clear_bit(Wworksched, &m->wsched); @@ -551,7 +553,7 @@ p9_pollwait(struct file *filp, wait_queue_head_t *wait_address, poll_table *p) } if (!pwait) { - P9_DPRINTK(P9_DEBUG_ERROR, "not enough wait_address slots\n"); + p9_debug(P9_DEBUG_ERROR, "not enough wait_address slots\n"); return; } @@ -573,8 +575,7 @@ static struct p9_conn *p9_conn_create(struct p9_client *client) int n; struct p9_conn *m; - P9_DPRINTK(P9_DEBUG_TRANS, "client %p msize %d\n", client, - client->msize); + p9_debug(P9_DEBUG_TRANS, "client %p msize %d\n", client, client->msize); m = kzalloc(sizeof(struct p9_conn), GFP_KERNEL); if (!m) return ERR_PTR(-ENOMEM); @@ -591,12 +592,12 @@ static struct p9_conn *p9_conn_create(struct p9_client *client) n = p9_fd_poll(client, &m->pt); if (n & POLLIN) { - P9_DPRINTK(P9_DEBUG_TRANS, "mux %p can read\n", m); + p9_debug(P9_DEBUG_TRANS, "mux %p can read\n", m); set_bit(Rpending, &m->wsched); } if (n & POLLOUT) { - P9_DPRINTK(P9_DEBUG_TRANS, "mux %p can write\n", m); + p9_debug(P9_DEBUG_TRANS, "mux %p can write\n", m); set_bit(Wpending, &m->wsched); } @@ -618,7 +619,7 @@ static void p9_poll_mux(struct p9_conn *m) n = p9_fd_poll(m->client, NULL); if (n < 0 || n & (POLLERR | POLLHUP | POLLNVAL)) { - P9_DPRINTK(P9_DEBUG_TRANS, "error mux %p err %d\n", m, n); + p9_debug(P9_DEBUG_TRANS, "error mux %p err %d\n", m, n); if (n >= 0) n = -ECONNRESET; p9_conn_cancel(m, n); @@ -626,19 +627,19 @@ static void p9_poll_mux(struct p9_conn *m) if (n & POLLIN) { set_bit(Rpending, &m->wsched); - P9_DPRINTK(P9_DEBUG_TRANS, "mux %p can read\n", m); + p9_debug(P9_DEBUG_TRANS, "mux %p can read\n", m); if (!test_and_set_bit(Rworksched, &m->wsched)) { - P9_DPRINTK(P9_DEBUG_TRANS, "sched read work %p\n", m); + p9_debug(P9_DEBUG_TRANS, "sched read work %p\n", m); schedule_work(&m->rq); } } if (n & POLLOUT) { set_bit(Wpending, &m->wsched); - P9_DPRINTK(P9_DEBUG_TRANS, "mux %p can write\n", m); + p9_debug(P9_DEBUG_TRANS, "mux %p can write\n", m); if ((m->wsize || !list_empty(&m->unsent_req_list)) && !test_and_set_bit(Wworksched, &m->wsched)) { - P9_DPRINTK(P9_DEBUG_TRANS, "sched write work %p\n", m); + p9_debug(P9_DEBUG_TRANS, "sched write work %p\n", m); schedule_work(&m->wq); } } @@ -661,8 +662,8 @@ static int p9_fd_request(struct p9_client *client, struct p9_req_t *req) struct p9_trans_fd *ts = client->trans; struct p9_conn *m = ts->conn; - P9_DPRINTK(P9_DEBUG_TRANS, "mux %p task %p tcall %p id %d\n", m, - current, req->tc, req->tc->id); + p9_debug(P9_DEBUG_TRANS, "mux %p task %p tcall %p id %d\n", + m, current, req->tc, req->tc->id); if (m->err < 0) return m->err; @@ -686,7 +687,7 @@ static int p9_fd_cancel(struct p9_client *client, struct p9_req_t *req) { int ret = 1; - P9_DPRINTK(P9_DEBUG_TRANS, "client %p req %p\n", client, req); + p9_debug(P9_DEBUG_TRANS, "client %p req %p\n", client, req); spin_lock(&client->lock); @@ -726,8 +727,8 @@ static int parse_opts(char *params, struct p9_fd_opts *opts) tmp_options = kstrdup(params, GFP_KERNEL); if (!tmp_options) { - P9_DPRINTK(P9_DEBUG_ERROR, - "failed to allocate copy of option string\n"); + p9_debug(P9_DEBUG_ERROR, + "failed to allocate copy of option string\n"); return -ENOMEM; } options = tmp_options; @@ -741,8 +742,8 @@ static int parse_opts(char *params, struct p9_fd_opts *opts) if (token != Opt_err) { r = match_int(&args[0], &option); if (r < 0) { - P9_DPRINTK(P9_DEBUG_ERROR, - "integer field, but no integer?\n"); + p9_debug(P9_DEBUG_ERROR, + "integer field, but no integer?\n"); continue; } } @@ -801,7 +802,8 @@ static int p9_socket_open(struct p9_client *client, struct socket *csocket) csocket->sk->sk_allocation = GFP_NOIO; fd = sock_map_fd(csocket, 0); if (fd < 0) { - P9_EPRINTK(KERN_ERR, "p9_socket_open: failed to map fd\n"); + pr_err("%s (%d): failed to map fd\n", + __func__, task_pid_nr(current)); sock_release(csocket); kfree(p); return fd; @@ -837,8 +839,8 @@ static int p9_socket_open(struct p9_client *client, struct socket *csocket) static void p9_conn_destroy(struct p9_conn *m) { - P9_DPRINTK(P9_DEBUG_TRANS, "mux %p prev %p next %p\n", m, - m->mux_list.prev, m->mux_list.next); + p9_debug(P9_DEBUG_TRANS, "mux %p prev %p next %p\n", + m, m->mux_list.prev, m->mux_list.next); p9_mux_poll_stop(m); cancel_work_sync(&m->rq); @@ -919,7 +921,8 @@ p9_fd_create_tcp(struct p9_client *client, const char *addr, char *args) err = __sock_create(read_pnet(¤t->nsproxy->net_ns), PF_INET, SOCK_STREAM, IPPROTO_TCP, &csocket, 1); if (err) { - P9_EPRINTK(KERN_ERR, "p9_trans_tcp: problem creating socket\n"); + pr_err("%s (%d): problem creating socket\n", + __func__, task_pid_nr(current)); return err; } @@ -927,9 +930,8 @@ p9_fd_create_tcp(struct p9_client *client, const char *addr, char *args) (struct sockaddr *)&sin_server, sizeof(struct sockaddr_in), 0); if (err < 0) { - P9_EPRINTK(KERN_ERR, - "p9_trans_tcp: problem connecting socket to %s\n", - addr); + pr_err("%s (%d): problem connecting socket to %s\n", + __func__, task_pid_nr(current), addr); sock_release(csocket); return err; } @@ -947,8 +949,8 @@ p9_fd_create_unix(struct p9_client *client, const char *addr, char *args) csocket = NULL; if (strlen(addr) >= UNIX_PATH_MAX) { - P9_EPRINTK(KERN_ERR, "p9_trans_unix: address too long: %s\n", - addr); + pr_err("%s (%d): address too long: %s\n", + __func__, task_pid_nr(current), addr); return -ENAMETOOLONG; } @@ -957,15 +959,16 @@ p9_fd_create_unix(struct p9_client *client, const char *addr, char *args) err = __sock_create(read_pnet(¤t->nsproxy->net_ns), PF_UNIX, SOCK_STREAM, 0, &csocket, 1); if (err < 0) { - P9_EPRINTK(KERN_ERR, "p9_trans_unix: problem creating socket\n"); + pr_err("%s (%d): problem creating socket\n", + __func__, task_pid_nr(current)); + return err; } err = csocket->ops->connect(csocket, (struct sockaddr *)&sun_server, sizeof(struct sockaddr_un) - 1, 0); if (err < 0) { - P9_EPRINTK(KERN_ERR, - "p9_trans_unix: problem connecting socket: %s: %d\n", - addr, err); + pr_err("%s (%d): problem connecting socket: %s: %d\n", + __func__, task_pid_nr(current), addr, err); sock_release(csocket); return err; } @@ -983,7 +986,7 @@ p9_fd_create(struct p9_client *client, const char *addr, char *args) parse_opts(args, &opts); if (opts.rfd == ~0 || opts.wfd == ~0) { - printk(KERN_ERR "v9fs: Insufficient options for proto=fd\n"); + pr_err("Insufficient options for proto=fd\n"); return -ENOPROTOOPT; } @@ -1050,7 +1053,7 @@ static void p9_poll_workfn(struct work_struct *work) { unsigned long flags; - P9_DPRINTK(P9_DEBUG_TRANS, "start %p\n", current); + p9_debug(P9_DEBUG_TRANS, "start %p\n", current); spin_lock_irqsave(&p9_poll_lock, flags); while (!list_empty(&p9_poll_pending_list)) { @@ -1066,7 +1069,7 @@ static void p9_poll_workfn(struct work_struct *work) } spin_unlock_irqrestore(&p9_poll_lock, flags); - P9_DPRINTK(P9_DEBUG_TRANS, "finish\n"); + p9_debug(P9_DEBUG_TRANS, "finish\n"); } int p9_trans_fd_init(void) diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c index 159c50f1c6bf..2c69ddd691a1 100644 --- a/net/9p/trans_rdma.c +++ b/net/9p/trans_rdma.c @@ -26,6 +26,8 @@ * */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/in.h> #include <linux/module.h> #include <linux/net.h> @@ -178,8 +180,8 @@ static int parse_opts(char *params, struct p9_rdma_opts *opts) tmp_options = kstrdup(params, GFP_KERNEL); if (!tmp_options) { - P9_DPRINTK(P9_DEBUG_ERROR, - "failed to allocate copy of option string\n"); + p9_debug(P9_DEBUG_ERROR, + "failed to allocate copy of option string\n"); return -ENOMEM; } options = tmp_options; @@ -192,8 +194,8 @@ static int parse_opts(char *params, struct p9_rdma_opts *opts) token = match_token(p, tokens, args); r = match_int(&args[0], &option); if (r < 0) { - P9_DPRINTK(P9_DEBUG_ERROR, - "integer field, but no integer?\n"); + p9_debug(P9_DEBUG_ERROR, + "integer field, but no integer?\n"); continue; } switch (token) { @@ -301,8 +303,7 @@ handle_recv(struct p9_client *client, struct p9_trans_rdma *rdma, return; err_out: - P9_DPRINTK(P9_DEBUG_ERROR, "req %p err %d status %d\n", - req, err, status); + p9_debug(P9_DEBUG_ERROR, "req %p err %d status %d\n", req, err, status); rdma->state = P9_RDMA_FLUSHING; client->status = Disconnected; } @@ -318,8 +319,8 @@ handle_send(struct p9_client *client, struct p9_trans_rdma *rdma, static void qp_event_handler(struct ib_event *event, void *context) { - P9_DPRINTK(P9_DEBUG_ERROR, "QP event %d context %p\n", event->event, - context); + p9_debug(P9_DEBUG_ERROR, "QP event %d context %p\n", + event->event, context); } static void cq_comp_handler(struct ib_cq *cq, void *cq_context) @@ -345,8 +346,7 @@ static void cq_comp_handler(struct ib_cq *cq, void *cq_context) break; default: - printk(KERN_ERR "9prdma: unexpected completion type, " - "c->wc_op=%d, wc.opcode=%d, status=%d\n", + pr_err("unexpected completion type, c->wc_op=%d, wc.opcode=%d, status=%d\n", c->wc_op, wc.opcode, wc.status); break; } @@ -356,7 +356,7 @@ static void cq_comp_handler(struct ib_cq *cq, void *cq_context) static void cq_event_handler(struct ib_event *e, void *v) { - P9_DPRINTK(P9_DEBUG_ERROR, "CQ event %d context %p\n", e->event, v); + p9_debug(P9_DEBUG_ERROR, "CQ event %d context %p\n", e->event, v); } static void rdma_destroy_trans(struct p9_trans_rdma *rdma) @@ -407,7 +407,7 @@ post_recv(struct p9_client *client, struct p9_rdma_context *c) return ib_post_recv(rdma->qp, &wr, &bad_wr); error: - P9_DPRINTK(P9_DEBUG_ERROR, "EIO\n"); + p9_debug(P9_DEBUG_ERROR, "EIO\n"); return -EIO; } @@ -500,7 +500,7 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req) kfree(c); kfree(rpl_context->rc); kfree(rpl_context); - P9_DPRINTK(P9_DEBUG_ERROR, "EIO\n"); + p9_debug(P9_DEBUG_ERROR, "EIO\n"); return -EIO; err_free1: kfree(rpl_context->rc); diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index e317583fcc73..3d432068f627 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -26,6 +26,8 @@ * */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/in.h> #include <linux/module.h> #include <linux/net.h> @@ -145,35 +147,22 @@ static void req_done(struct virtqueue *vq) struct p9_req_t *req; unsigned long flags; - P9_DPRINTK(P9_DEBUG_TRANS, ": request done\n"); + p9_debug(P9_DEBUG_TRANS, ": request done\n"); while (1) { spin_lock_irqsave(&chan->lock, flags); rc = virtqueue_get_buf(chan->vq, &len); - if (rc == NULL) { spin_unlock_irqrestore(&chan->lock, flags); break; } - chan->ring_bufs_avail = 1; spin_unlock_irqrestore(&chan->lock, flags); /* Wakeup if anyone waiting for VirtIO ring space. */ wake_up(chan->vc_wq); - P9_DPRINTK(P9_DEBUG_TRANS, ": rc %p\n", rc); - P9_DPRINTK(P9_DEBUG_TRANS, ": lookup tag %d\n", rc->tag); + p9_debug(P9_DEBUG_TRANS, ": rc %p\n", rc); + p9_debug(P9_DEBUG_TRANS, ": lookup tag %d\n", rc->tag); req = p9_tag_lookup(chan->client, rc->tag); - if (req->tc->private) { - struct trans_rpage_info *rp = req->tc->private; - int p = rp->rp_nr_pages; - /*Release pages */ - p9_release_req_pages(rp); - atomic_sub(p, &vp_pinned); - wake_up(&vp_wq); - if (rp->rp_alloc) - kfree(rp); - req->tc->private = NULL; - } req->status = REQ_STATUS_RCVD; p9_client_cb(chan->client, req); } @@ -193,9 +182,8 @@ static void req_done(struct virtqueue *vq) * */ -static int -pack_sg_list(struct scatterlist *sg, int start, int limit, char *data, - int count) +static int pack_sg_list(struct scatterlist *sg, int start, + int limit, char *data, int count) { int s; int index = start; @@ -224,31 +212,36 @@ static int p9_virtio_cancel(struct p9_client *client, struct p9_req_t *req) * this takes a list of pages. * @sg: scatter/gather list to pack into * @start: which segment of the sg_list to start at - * @pdata_off: Offset into the first page * @**pdata: a list of pages to add into sg. + * @nr_pages: number of pages to pack into the scatter/gather list + * @data: data to pack into scatter/gather list * @count: amount of data to pack into the scatter/gather list */ static int -pack_sg_list_p(struct scatterlist *sg, int start, int limit, size_t pdata_off, - struct page **pdata, int count) +pack_sg_list_p(struct scatterlist *sg, int start, int limit, + struct page **pdata, int nr_pages, char *data, int count) { - int s; - int i = 0; + int i = 0, s; + int data_off; int index = start; - if (pdata_off) { - s = min((int)(PAGE_SIZE - pdata_off), count); - sg_set_page(&sg[index++], pdata[i++], s, pdata_off); - count -= s; - } - - while (count) { - BUG_ON(index > limit); - s = min((int)PAGE_SIZE, count); - sg_set_page(&sg[index++], pdata[i++], s, 0); + BUG_ON(nr_pages > (limit - start)); + /* + * if the first page doesn't start at + * page boundary find the offset + */ + data_off = offset_in_page(data); + while (nr_pages) { + s = rest_of_page(data); + if (s > count) + s = count; + sg_set_page(&sg[index++], pdata[i++], s, data_off); + data_off = 0; + data += s; count -= s; + nr_pages--; } - return index-start; + return index - start; } /** @@ -261,143 +254,212 @@ pack_sg_list_p(struct scatterlist *sg, int start, int limit, size_t pdata_off, static int p9_virtio_request(struct p9_client *client, struct p9_req_t *req) { - int in, out, inp, outp; - struct virtio_chan *chan = client->trans; + int err; + int in, out; unsigned long flags; - size_t pdata_off = 0; - struct trans_rpage_info *rpinfo = NULL; - int err, pdata_len = 0; + struct virtio_chan *chan = client->trans; - P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request\n"); + p9_debug(P9_DEBUG_TRANS, "9p debug: virtio request\n"); req->status = REQ_STATUS_SENT; +req_retry: + spin_lock_irqsave(&chan->lock, flags); + + /* Handle out VirtIO ring buffers */ + out = pack_sg_list(chan->sg, 0, + VIRTQUEUE_NUM, req->tc->sdata, req->tc->size); - if (req->tc->pbuf_size && (req->tc->pubuf && P9_IS_USER_CONTEXT)) { - int nr_pages = p9_nr_pages(req); - int rpinfo_size = sizeof(struct trans_rpage_info) + - sizeof(struct page *) * nr_pages; + in = pack_sg_list(chan->sg, out, + VIRTQUEUE_NUM, req->rc->sdata, req->rc->capacity); - if (atomic_read(&vp_pinned) >= chan->p9_max_pages) { - err = wait_event_interruptible(vp_wq, - atomic_read(&vp_pinned) < chan->p9_max_pages); + err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc, + GFP_ATOMIC); + if (err < 0) { + if (err == -ENOSPC) { + chan->ring_bufs_avail = 0; + spin_unlock_irqrestore(&chan->lock, flags); + err = wait_event_interruptible(*chan->vc_wq, + chan->ring_bufs_avail); if (err == -ERESTARTSYS) return err; - P9_DPRINTK(P9_DEBUG_TRANS, "9p: May gup pages now.\n"); - } - if (rpinfo_size <= (req->tc->capacity - req->tc->size)) { - /* We can use sdata */ - req->tc->private = req->tc->sdata + req->tc->size; - rpinfo = (struct trans_rpage_info *)req->tc->private; - rpinfo->rp_alloc = 0; + p9_debug(P9_DEBUG_TRANS, "Retry virtio request\n"); + goto req_retry; } else { - req->tc->private = kmalloc(rpinfo_size, GFP_NOFS); - if (!req->tc->private) { - P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: " - "private kmalloc returned NULL"); - return -ENOMEM; - } - rpinfo = (struct trans_rpage_info *)req->tc->private; - rpinfo->rp_alloc = 1; + spin_unlock_irqrestore(&chan->lock, flags); + p9_debug(P9_DEBUG_TRANS, + "virtio rpc add_buf returned failure\n"); + return -EIO; } + } + virtqueue_kick(chan->vq); + spin_unlock_irqrestore(&chan->lock, flags); + + p9_debug(P9_DEBUG_TRANS, "virtio request kicked\n"); + return 0; +} - err = p9_payload_gup(req, &pdata_off, &pdata_len, nr_pages, - req->tc->id == P9_TREAD ? 1 : 0); - if (err < 0) { - if (rpinfo->rp_alloc) - kfree(rpinfo); +static int p9_get_mapped_pages(struct virtio_chan *chan, + struct page **pages, char *data, + int nr_pages, int write, int kern_buf) +{ + int err; + if (!kern_buf) { + /* + * We allow only p9_max_pages pinned. We wait for the + * Other zc request to finish here + */ + if (atomic_read(&vp_pinned) >= chan->p9_max_pages) { + err = wait_event_interruptible(vp_wq, + (atomic_read(&vp_pinned) < chan->p9_max_pages)); + if (err == -ERESTARTSYS) + return err; + } + err = p9_payload_gup(data, &nr_pages, pages, write); + if (err < 0) return err; - } else { - atomic_add(rpinfo->rp_nr_pages, &vp_pinned); + atomic_add(nr_pages, &vp_pinned); + } else { + /* kernel buffer, no need to pin pages */ + int s, index = 0; + int count = nr_pages; + while (nr_pages) { + s = rest_of_page(data); + pages[index++] = virt_to_page(data); + data += s; + nr_pages--; } + nr_pages = count; } + return nr_pages; +} -req_retry_pinned: - spin_lock_irqsave(&chan->lock, flags); +/** + * p9_virtio_zc_request - issue a zero copy request + * @client: client instance issuing the request + * @req: request to be issued + * @uidata: user bffer that should be ued for zero copy read + * @uodata: user buffer that shoud be user for zero copy write + * @inlen: read buffer size + * @olen: write buffer size + * @hdrlen: reader header size, This is the size of response protocol data + * + */ +static int +p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req, + char *uidata, char *uodata, int inlen, + int outlen, int in_hdr_len, int kern_buf) +{ + int in, out, err; + unsigned long flags; + int in_nr_pages = 0, out_nr_pages = 0; + struct page **in_pages = NULL, **out_pages = NULL; + struct virtio_chan *chan = client->trans; - /* Handle out VirtIO ring buffers */ - out = pack_sg_list(chan->sg, 0, VIRTQUEUE_NUM, req->tc->sdata, - req->tc->size); - - if (req->tc->pbuf_size && (req->tc->id == P9_TWRITE)) { - /* We have additional write payload buffer to take care */ - if (req->tc->pubuf && P9_IS_USER_CONTEXT) { - outp = pack_sg_list_p(chan->sg, out, VIRTQUEUE_NUM, - pdata_off, rpinfo->rp_data, pdata_len); - } else { - char *pbuf; - if (req->tc->pubuf) - pbuf = (__force char *) req->tc->pubuf; - else - pbuf = req->tc->pkbuf; - outp = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM, pbuf, - req->tc->pbuf_size); + p9_debug(P9_DEBUG_TRANS, "virtio request\n"); + + if (uodata) { + out_nr_pages = p9_nr_pages(uodata, outlen); + out_pages = kmalloc(sizeof(struct page *) * out_nr_pages, + GFP_NOFS); + if (!out_pages) { + err = -ENOMEM; + goto err_out; + } + out_nr_pages = p9_get_mapped_pages(chan, out_pages, uodata, + out_nr_pages, 0, kern_buf); + if (out_nr_pages < 0) { + err = out_nr_pages; + kfree(out_pages); + out_pages = NULL; + goto err_out; } - out += outp; } - - /* Handle in VirtIO ring buffers */ - if (req->tc->pbuf_size && - ((req->tc->id == P9_TREAD) || (req->tc->id == P9_TREADDIR))) { - /* - * Take care of additional Read payload. - * 11 is the read/write header = PDU Header(7) + IO Size (4). - * Arrange in such a way that server places header in the - * alloced memory and payload onto the user buffer. - */ - inp = pack_sg_list(chan->sg, out, - VIRTQUEUE_NUM, req->rc->sdata, 11); - /* - * Running executables in the filesystem may result in - * a read request with kernel buffer as opposed to user buffer. - */ - if (req->tc->pubuf && P9_IS_USER_CONTEXT) { - in = pack_sg_list_p(chan->sg, out+inp, VIRTQUEUE_NUM, - pdata_off, rpinfo->rp_data, pdata_len); - } else { - char *pbuf; - if (req->tc->pubuf) - pbuf = (__force char *) req->tc->pubuf; - else - pbuf = req->tc->pkbuf; - - in = pack_sg_list(chan->sg, out+inp, VIRTQUEUE_NUM, - pbuf, req->tc->pbuf_size); + if (uidata) { + in_nr_pages = p9_nr_pages(uidata, inlen); + in_pages = kmalloc(sizeof(struct page *) * in_nr_pages, + GFP_NOFS); + if (!in_pages) { + err = -ENOMEM; + goto err_out; + } + in_nr_pages = p9_get_mapped_pages(chan, in_pages, uidata, + in_nr_pages, 1, kern_buf); + if (in_nr_pages < 0) { + err = in_nr_pages; + kfree(in_pages); + in_pages = NULL; + goto err_out; } - in += inp; - } else { - in = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM, - req->rc->sdata, req->rc->capacity); } + req->status = REQ_STATUS_SENT; +req_retry_pinned: + spin_lock_irqsave(&chan->lock, flags); + /* out data */ + out = pack_sg_list(chan->sg, 0, + VIRTQUEUE_NUM, req->tc->sdata, req->tc->size); - err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc); + if (out_pages) + out += pack_sg_list_p(chan->sg, out, VIRTQUEUE_NUM, + out_pages, out_nr_pages, uodata, outlen); + /* + * Take care of in data + * For example TREAD have 11. + * 11 is the read/write header = PDU Header(7) + IO Size (4). + * Arrange in such a way that server places header in the + * alloced memory and payload onto the user buffer. + */ + in = pack_sg_list(chan->sg, out, + VIRTQUEUE_NUM, req->rc->sdata, in_hdr_len); + if (in_pages) + in += pack_sg_list_p(chan->sg, out + in, VIRTQUEUE_NUM, + in_pages, in_nr_pages, uidata, inlen); + + err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc, + GFP_ATOMIC); if (err < 0) { if (err == -ENOSPC) { chan->ring_bufs_avail = 0; spin_unlock_irqrestore(&chan->lock, flags); err = wait_event_interruptible(*chan->vc_wq, - chan->ring_bufs_avail); + chan->ring_bufs_avail); if (err == -ERESTARTSYS) - return err; + goto err_out; - P9_DPRINTK(P9_DEBUG_TRANS, "9p:Retry virtio request\n"); + p9_debug(P9_DEBUG_TRANS, "Retry virtio request\n"); goto req_retry_pinned; } else { spin_unlock_irqrestore(&chan->lock, flags); - P9_DPRINTK(P9_DEBUG_TRANS, - "9p debug: " - "virtio rpc add_buf returned failure"); - if (rpinfo && rpinfo->rp_alloc) - kfree(rpinfo); - return -EIO; + p9_debug(P9_DEBUG_TRANS, + "virtio rpc add_buf returned failure\n"); + err = -EIO; + goto err_out; } } - virtqueue_kick(chan->vq); spin_unlock_irqrestore(&chan->lock, flags); - - P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request kicked\n"); - return 0; + p9_debug(P9_DEBUG_TRANS, "virtio request kicked\n"); + err = wait_event_interruptible(*req->wq, + req->status >= REQ_STATUS_RCVD); + /* + * Non kernel buffers are pinned, unpin them + */ +err_out: + if (!kern_buf) { + if (in_pages) { + p9_release_pages(in_pages, in_nr_pages); + atomic_sub(in_nr_pages, &vp_pinned); + } + if (out_pages) { + p9_release_pages(out_pages, out_nr_pages); + atomic_sub(out_nr_pages, &vp_pinned); + } + /* wakeup anybody waiting for slots to pin pages */ + wake_up(&vp_wq); + } + kfree(in_pages); + kfree(out_pages); + return err; } static ssize_t p9_mount_tag_show(struct device *dev, @@ -431,7 +493,7 @@ static int p9_virtio_probe(struct virtio_device *vdev) chan = kmalloc(sizeof(struct virtio_chan), GFP_KERNEL); if (!chan) { - printk(KERN_ERR "9p: Failed to allocate virtio 9P channel\n"); + pr_err("Failed to allocate virtio 9P channel\n"); err = -ENOMEM; goto fail; } @@ -532,7 +594,7 @@ p9_virtio_create(struct p9_client *client, const char *devname, char *args) mutex_unlock(&virtio_9p_lock); if (!found) { - printk(KERN_ERR "9p: no channels available\n"); + pr_err("no channels available\n"); return ret; } @@ -591,8 +653,8 @@ static struct p9_trans_module p9_virtio_trans = { .create = p9_virtio_create, .close = p9_virtio_close, .request = p9_virtio_request, + .zc_request = p9_virtio_zc_request, .cancel = p9_virtio_cancel, - /* * We leave one entry for input and one entry for response * headers. We also skip one more entry to accomodate, address @@ -600,7 +662,6 @@ static struct p9_trans_module p9_virtio_trans = { * page in zero copy. */ .maxsize = PAGE_SIZE * (VIRTQUEUE_NUM - 3), - .pref = P9_TRANS_PREF_PAYLOAD_SEP, .def = 0, .owner = THIS_MODULE, }; diff --git a/net/9p/util.c b/net/9p/util.c index 9c1c9348ac35..6ceeeb384de7 100644 --- a/net/9p/util.c +++ b/net/9p/util.c @@ -106,7 +106,7 @@ retry: else if (error) return -1; - P9_DPRINTK(P9_DEBUG_MUX, " id %d pool %p\n", i, p); + p9_debug(P9_DEBUG_MUX, " id %d pool %p\n", i, p); return i; } EXPORT_SYMBOL(p9_idpool_get); @@ -124,7 +124,7 @@ void p9_idpool_put(int id, struct p9_idpool *p) { unsigned long flags; - P9_DPRINTK(P9_DEBUG_MUX, " id %d pool %p\n", id, p); + p9_debug(P9_DEBUG_MUX, " id %d pool %p\n", id, p); spin_lock_irqsave(&p->lock, flags); idr_remove(&p->pool, id); diff --git a/net/Kconfig b/net/Kconfig index a07314844238..e07272d0bb2d 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -215,6 +215,7 @@ source "net/sched/Kconfig" source "net/dcb/Kconfig" source "net/dns_resolver/Kconfig" source "net/batman-adv/Kconfig" +source "net/openvswitch/Kconfig" config RPS boolean @@ -232,6 +233,19 @@ config XPS depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS default y +config NETPRIO_CGROUP + tristate "Network priority cgroup" + depends on CGROUPS + ---help--- + Cgroup subsystem for use in assigning processes to network priorities on + a per-interface basis + +config BQL + boolean + depends on SYSFS + select DQL + default y + config HAVE_BPF_JIT bool diff --git a/net/Makefile b/net/Makefile index acdde4950de4..ad432fa4d934 100644 --- a/net/Makefile +++ b/net/Makefile @@ -69,3 +69,4 @@ obj-$(CONFIG_DNS_RESOLVER) += dns_resolver/ obj-$(CONFIG_CEPH_LIB) += ceph/ obj-$(CONFIG_BATMAN_ADV) += batman-adv/ obj-$(CONFIG_NFC) += nfc/ +obj-$(CONFIG_OPENVSWITCH) += openvswitch/ diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c index 1acc69576df8..173a2e82f486 100644 --- a/net/appletalk/aarp.c +++ b/net/appletalk/aarp.c @@ -39,6 +39,7 @@ #include <linux/init.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> +#include <linux/export.h> int sysctl_aarp_expiry_time = AARP_EXPIRY_TIME; int sysctl_aarp_tick_time = AARP_TICK_TIME; diff --git a/net/appletalk/atalk_proc.c b/net/appletalk/atalk_proc.c index 6ef0e761e5de..b5b1a221c242 100644 --- a/net/appletalk/atalk_proc.c +++ b/net/appletalk/atalk_proc.c @@ -14,6 +14,7 @@ #include <net/net_namespace.h> #include <net/sock.h> #include <linux/atalk.h> +#include <linux/export.h> static __inline__ struct atalk_iface *atalk_get_interface_idx(loff_t pos) diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index b1fe7c35e8d1..bfa9ab93eda5 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -951,13 +951,12 @@ static unsigned long atalk_sum_skb(const struct sk_buff *skb, int offset, /* checksum stuff in frags */ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { int end; - + const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; WARN_ON(start > offset + len); - end = start + skb_shinfo(skb)->frags[i].size; + end = start + skb_frag_size(frag); if ((copy = end - offset) > 0) { u8 *vaddr; - skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; if (copy > len) copy = len; diff --git a/net/atm/atm_misc.c b/net/atm/atm_misc.c index f41f02656ff4..876fbe83e2e4 100644 --- a/net/atm/atm_misc.c +++ b/net/atm/atm_misc.c @@ -26,7 +26,7 @@ struct sk_buff *atm_alloc_charge(struct atm_vcc *vcc, int pdu_size, gfp_t gfp_flags) { struct sock *sk = sk_atm(vcc); - int guess = atm_guess_pdu2truesize(pdu_size); + int guess = SKB_TRUESIZE(pdu_size); atm_force_charge(vcc, guess); if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) { diff --git a/net/atm/br2684.c b/net/atm/br2684.c index d07223c834af..353fccf1cde3 100644 --- a/net/atm/br2684.c +++ b/net/atm/br2684.c @@ -53,6 +53,7 @@ static const unsigned char ethertype_ipv4[] = { ETHERTYPE_IPV4 }; static const unsigned char ethertype_ipv6[] = { ETHERTYPE_IPV6 }; static const unsigned char llc_oui_pid_pad[] = { LLC, SNAP_BRIDGED, PID_ETHERNET, PAD_BRIDGED }; +static const unsigned char pad[] = { PAD_BRIDGED }; static const unsigned char llc_oui_ipv4[] = { LLC, SNAP_ROUTED, ETHERTYPE_IPV4 }; static const unsigned char llc_oui_ipv6[] = { LLC, SNAP_ROUTED, ETHERTYPE_IPV6 }; @@ -202,7 +203,10 @@ static int br2684_xmit_vcc(struct sk_buff *skb, struct net_device *dev, { struct br2684_dev *brdev = BRPRIV(dev); struct atm_vcc *atmvcc; - int minheadroom = (brvcc->encaps == e_llc) ? 10 : 2; + int minheadroom = (brvcc->encaps == e_llc) ? + ((brdev->payload == p_bridged) ? + sizeof(llc_oui_pid_pad) : sizeof(llc_oui_ipv4)) : + ((brdev->payload == p_bridged) ? BR2684_PAD_LEN : 0); if (skb_headroom(skb) < minheadroom) { struct sk_buff *skb2 = skb_realloc_headroom(skb, minheadroom); @@ -450,7 +454,7 @@ static void br2684_push(struct atm_vcc *atmvcc, struct sk_buff *skb) skb->pkt_type = PACKET_HOST; } else { /* p_bridged */ /* first 2 chars should be 0 */ - if (*((u16 *) (skb->data)) != 0) + if (memcmp(skb->data, pad, BR2684_PAD_LEN) != 0) goto error; skb_pull(skb, BR2684_PAD_LEN); skb->protocol = eth_type_trans(skb, net_dev); @@ -489,15 +493,11 @@ free_skb: */ static int br2684_regvcc(struct atm_vcc *atmvcc, void __user * arg) { - struct sk_buff_head queue; - int err; struct br2684_vcc *brvcc; - struct sk_buff *skb, *tmp; - struct sk_buff_head *rq; struct br2684_dev *brdev; struct net_device *net_dev; struct atm_backend_br2684 be; - unsigned long flags; + int err; if (copy_from_user(&be, arg, sizeof be)) return -EFAULT; @@ -550,23 +550,6 @@ static int br2684_regvcc(struct atm_vcc *atmvcc, void __user * arg) atmvcc->push = br2684_push; atmvcc->pop = br2684_pop; - __skb_queue_head_init(&queue); - rq = &sk_atm(atmvcc)->sk_receive_queue; - - spin_lock_irqsave(&rq->lock, flags); - skb_queue_splice_init(rq, &queue); - spin_unlock_irqrestore(&rq->lock, flags); - - skb_queue_walk_safe(&queue, skb, tmp) { - struct net_device *dev; - - br2684_push(atmvcc, skb); - dev = skb->dev; - - dev->stats.rx_bytes -= skb->len; - dev->stats.rx_packets--; - } - /* initialize netdev carrier state */ if (atmvcc->dev->signal == ATM_PHY_SIG_LOST) netif_carrier_off(net_dev); @@ -574,6 +557,10 @@ static int br2684_regvcc(struct atm_vcc *atmvcc, void __user * arg) netif_carrier_on(net_dev); __module_get(THIS_MODULE); + + /* re-process everything received between connection setup and + backend setup */ + vcc_process_recv_queue(atmvcc); return 0; error: @@ -600,6 +587,7 @@ static void br2684_setup(struct net_device *netdev) struct br2684_dev *brdev = BRPRIV(netdev); ether_setup(netdev); + netdev->hard_header_len += sizeof(llc_oui_pid_pad); /* worst case */ brdev->net_dev = netdev; netdev->netdev_ops = &br2684_netdev_ops; @@ -612,7 +600,7 @@ static void br2684_setup_routed(struct net_device *netdev) struct br2684_dev *brdev = BRPRIV(netdev); brdev->net_dev = netdev; - netdev->hard_header_len = 0; + netdev->hard_header_len = sizeof(llc_oui_ipv4); /* worst case */ netdev->netdev_ops = &br2684_netdev_ops_routed; netdev->addr_len = 0; netdev->mtu = 1500; diff --git a/net/atm/clip.c b/net/atm/clip.c index 852394072fa1..c12c2582457c 100644 --- a/net/atm/clip.c +++ b/net/atm/clip.c @@ -33,6 +33,7 @@ #include <linux/slab.h> #include <net/route.h> /* for struct rtable and routing */ #include <net/icmp.h> /* icmp_send */ +#include <net/arp.h> #include <linux/param.h> /* for HZ */ #include <linux/uaccess.h> #include <asm/byteorder.h> /* for htons etc. */ @@ -119,7 +120,7 @@ out: /* The neighbour entry n->lock is held. */ static int neigh_check_cb(struct neighbour *n) { - struct atmarp_entry *entry = NEIGH2ENTRY(n); + struct atmarp_entry *entry = neighbour_priv(n); struct clip_vcc *cv; for (cv = entry->vccs; cv; cv = cv->next) { @@ -189,6 +190,13 @@ static void clip_push(struct atm_vcc *vcc, struct sk_buff *skb) struct clip_vcc *clip_vcc = CLIP_VCC(vcc); pr_debug("\n"); + + if (!clip_devs) { + atm_return(vcc, skb->truesize); + kfree_skb(skb); + return; + } + if (!skb) { pr_debug("removing VCC %p\n", clip_vcc); if (clip_vcc->entry) @@ -255,8 +263,10 @@ static void clip_pop(struct atm_vcc *vcc, struct sk_buff *skb) static void clip_neigh_solicit(struct neighbour *neigh, struct sk_buff *skb) { + __be32 *ip = (__be32 *) neigh->primary_key; + pr_debug("(neigh %p, skb %p)\n", neigh, skb); - to_atmarpd(act_need, PRIV(neigh->dev)->number, NEIGH2ENTRY(neigh)->ip); + to_atmarpd(act_need, PRIV(neigh->dev)->number, *ip); } static void clip_neigh_error(struct neighbour *neigh, struct sk_buff *skb) @@ -277,72 +287,24 @@ static const struct neigh_ops clip_neigh_ops = { static int clip_constructor(struct neighbour *neigh) { - struct atmarp_entry *entry = NEIGH2ENTRY(neigh); - struct net_device *dev = neigh->dev; - struct in_device *in_dev; - struct neigh_parms *parms; + struct atmarp_entry *entry = neighbour_priv(neigh); - pr_debug("(neigh %p, entry %p)\n", neigh, entry); - neigh->type = inet_addr_type(&init_net, entry->ip); - if (neigh->type != RTN_UNICAST) + if (neigh->tbl->family != AF_INET) return -EINVAL; - rcu_read_lock(); - in_dev = __in_dev_get_rcu(dev); - if (!in_dev) { - rcu_read_unlock(); + if (neigh->type != RTN_UNICAST) return -EINVAL; - } - - parms = in_dev->arp_parms; - __neigh_parms_put(neigh->parms); - neigh->parms = neigh_parms_clone(parms); - rcu_read_unlock(); + neigh->nud_state = NUD_NONE; neigh->ops = &clip_neigh_ops; - neigh->output = neigh->nud_state & NUD_VALID ? - neigh->ops->connected_output : neigh->ops->output; + neigh->output = neigh->ops->output; entry->neigh = neigh; entry->vccs = NULL; entry->expires = jiffies - 1; + return 0; } -static u32 clip_hash(const void *pkey, const struct net_device *dev, __u32 rnd) -{ - return jhash_2words(*(u32 *) pkey, dev->ifindex, rnd); -} - -static struct neigh_table clip_tbl = { - .family = AF_INET, - .entry_size = sizeof(struct neighbour)+sizeof(struct atmarp_entry), - .key_len = 4, - .hash = clip_hash, - .constructor = clip_constructor, - .id = "clip_arp_cache", - - /* parameters are copied from ARP ... */ - .parms = { - .tbl = &clip_tbl, - .base_reachable_time = 30 * HZ, - .retrans_time = 1 * HZ, - .gc_staletime = 60 * HZ, - .reachable_time = 30 * HZ, - .delay_probe_time = 5 * HZ, - .queue_len = 3, - .ucast_probes = 3, - .mcast_probes = 3, - .anycast_delay = 1 * HZ, - .proxy_delay = (8 * HZ) / 10, - .proxy_qlen = 64, - .locktime = 1 * HZ, - }, - .gc_interval = 30 * HZ, - .gc_thresh1 = 128, - .gc_thresh2 = 512, - .gc_thresh3 = 1024, -}; - /* @@@ copy bh locking from arp.c -- need to bh-enable atm code before */ /* @@ -376,28 +338,19 @@ static netdev_tx_t clip_start_xmit(struct sk_buff *skb, dev->stats.tx_dropped++; return NETDEV_TX_OK; } - n = dst_get_neighbour(dst); + n = dst_get_neighbour_noref(dst); if (!n) { -#if 0 - n = clip_find_neighbour(skb_dst(skb), 1); - if (!n) { - dev_kfree_skb(skb); /* lost that one */ - dev->stats.tx_dropped++; - return 0; - } - dst_set_neighbour(dst, n); -#endif pr_err("NO NEIGHBOUR !\n"); dev_kfree_skb(skb); dev->stats.tx_dropped++; return NETDEV_TX_OK; } - entry = NEIGH2ENTRY(n); + entry = neighbour_priv(n); if (!entry->vccs) { if (time_after(jiffies, entry->expires)) { /* should be resolved */ entry->expires = jiffies + ATMARP_RETRY_DELAY * HZ; - to_atmarpd(act_need, PRIV(dev)->number, entry->ip); + to_atmarpd(act_need, PRIV(dev)->number, *((__be32 *)n->primary_key)); } if (entry->neigh->arp_queue.qlen < ATMARP_MAX_UNRES_PACKETS) skb_queue_tail(&entry->neigh->arp_queue, skb); @@ -448,10 +401,7 @@ static netdev_tx_t clip_start_xmit(struct sk_buff *skb, static int clip_mkip(struct atm_vcc *vcc, int timeout) { - struct sk_buff_head *rq, queue; struct clip_vcc *clip_vcc; - struct sk_buff *skb, *tmp; - unsigned long flags; if (!vcc->push) return -EBADFD; @@ -472,29 +422,9 @@ static int clip_mkip(struct atm_vcc *vcc, int timeout) vcc->push = clip_push; vcc->pop = clip_pop; - __skb_queue_head_init(&queue); - rq = &sk_atm(vcc)->sk_receive_queue; - - spin_lock_irqsave(&rq->lock, flags); - skb_queue_splice_init(rq, &queue); - spin_unlock_irqrestore(&rq->lock, flags); - /* re-process everything received between connection setup and MKIP */ - skb_queue_walk_safe(&queue, skb, tmp) { - if (!clip_devs) { - atm_return(vcc, skb->truesize); - kfree_skb(skb); - } else { - struct net_device *dev = skb->dev; - unsigned int len = skb->len; - - skb_get(skb); - clip_push(vcc, skb); - dev->stats.rx_packets--; - dev->stats.rx_bytes -= len; - kfree_skb(skb); - } - } + vcc_process_recv_queue(vcc); + return 0; } @@ -523,11 +453,11 @@ static int clip_setentry(struct atm_vcc *vcc, __be32 ip) rt = ip_route_output(&init_net, ip, 0, 1, 0); if (IS_ERR(rt)) return PTR_ERR(rt); - neigh = __neigh_lookup(&clip_tbl, &ip, rt->dst.dev, 1); + neigh = __neigh_lookup(&arp_tbl, &ip, rt->dst.dev, 1); ip_rt_put(rt); if (!neigh) return -ENOMEM; - entry = NEIGH2ENTRY(neigh); + entry = neighbour_priv(neigh); if (entry != clip_vcc->entry) { if (!clip_vcc->entry) pr_debug("add\n"); @@ -544,13 +474,15 @@ static int clip_setentry(struct atm_vcc *vcc, __be32 ip) } static const struct net_device_ops clip_netdev_ops = { - .ndo_start_xmit = clip_start_xmit, + .ndo_start_xmit = clip_start_xmit, + .ndo_neigh_construct = clip_constructor, }; static void clip_setup(struct net_device *dev) { dev->netdev_ops = &clip_netdev_ops; dev->type = ARPHRD_ATM; + dev->neigh_priv_len = sizeof(struct atmarp_entry); dev->hard_header_len = RFC1483LLC_LEN; dev->mtu = RFC1626_MTU; dev->tx_queue_len = 100; /* "normal" queue (packets) */ @@ -604,10 +536,8 @@ static int clip_device_event(struct notifier_block *this, unsigned long event, if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; - if (event == NETDEV_UNREGISTER) { - neigh_ifdown(&clip_tbl, dev); + if (event == NETDEV_UNREGISTER) return NOTIFY_DONE; - } /* ignore non-CLIP devices */ if (dev->type != ARPHRD_ATM || dev->netdev_ops != &clip_netdev_ops) @@ -787,9 +717,10 @@ static void svc_addr(struct seq_file *seq, struct sockaddr_atmsvc *addr) /* This means the neighbour entry has no attached VCC objects. */ #define SEQ_NO_VCC_TOKEN ((void *) 2) -static void atmarp_info(struct seq_file *seq, struct net_device *dev, +static void atmarp_info(struct seq_file *seq, struct neighbour *n, struct atmarp_entry *entry, struct clip_vcc *clip_vcc) { + struct net_device *dev = n->dev; unsigned long exp; char buf[17]; int svc, llc, off; @@ -809,8 +740,7 @@ static void atmarp_info(struct seq_file *seq, struct net_device *dev, seq_printf(seq, "%-6s%-4s%-4s%5ld ", dev->name, svc ? "SVC" : "PVC", llc ? "LLC" : "NULL", exp); - off = scnprintf(buf, sizeof(buf) - 1, "%pI4", - &entry->ip); + off = scnprintf(buf, sizeof(buf) - 1, "%pI4", n->primary_key); while (off < 16) buf[off++] = ' '; buf[off] = '\0'; @@ -881,14 +811,17 @@ static void *clip_seq_sub_iter(struct neigh_seq_state *_state, { struct clip_seq_state *state = (struct clip_seq_state *)_state; - return clip_seq_vcc_walk(state, NEIGH2ENTRY(n), pos); + if (n->dev->type != ARPHRD_ATM) + return NULL; + + return clip_seq_vcc_walk(state, neighbour_priv(n), pos); } static void *clip_seq_start(struct seq_file *seq, loff_t * pos) { struct clip_seq_state *state = seq->private; state->ns.neigh_sub_iter = clip_seq_sub_iter; - return neigh_seq_start(seq, pos, &clip_tbl, NEIGH_SEQ_NEIGH_ONLY); + return neigh_seq_start(seq, pos, &arp_tbl, NEIGH_SEQ_NEIGH_ONLY); } static int clip_seq_show(struct seq_file *seq, void *v) @@ -900,10 +833,10 @@ static int clip_seq_show(struct seq_file *seq, void *v) seq_puts(seq, atm_arp_banner); } else { struct clip_seq_state *state = seq->private; - struct neighbour *n = v; struct clip_vcc *vcc = state->vcc; + struct neighbour *n = v; - atmarp_info(seq, n->dev, NEIGH2ENTRY(n), vcc); + atmarp_info(seq, n, neighbour_priv(n), vcc); } return 0; } @@ -934,9 +867,6 @@ static void atm_clip_exit_noproc(void); static int __init atm_clip_init(void) { - neigh_table_init_no_netlink(&clip_tbl); - - clip_tbl_hook = &clip_tbl; register_atm_ioctl(&clip_ioctl_ops); register_netdevice_notifier(&clip_dev_notifier); register_inetaddr_notifier(&clip_inet_notifier); @@ -973,12 +903,6 @@ static void atm_clip_exit_noproc(void) */ del_timer_sync(&idle_timer); - /* Next, purge the table, so that the device - * unregister loop below does not hang due to - * device references remaining in the table. - */ - neigh_ifdown(&clip_tbl, NULL); - dev = clip_devs; while (dev) { next = PRIV(dev)->next; @@ -986,11 +910,6 @@ static void atm_clip_exit_noproc(void) free_netdev(dev); dev = next; } - - /* Now it is safe to fully shutdown whole table. */ - neigh_table_clear(&clip_tbl); - - clip_tbl_hook = NULL; } static void __exit atm_clip_exit(void) diff --git a/net/atm/common.c b/net/atm/common.c index 14ff9fe39989..b4b44dbed645 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -214,6 +214,26 @@ void vcc_release_async(struct atm_vcc *vcc, int reply) } EXPORT_SYMBOL(vcc_release_async); +void vcc_process_recv_queue(struct atm_vcc *vcc) +{ + struct sk_buff_head queue, *rq; + struct sk_buff *skb, *tmp; + unsigned long flags; + + __skb_queue_head_init(&queue); + rq = &sk_atm(vcc)->sk_receive_queue; + + spin_lock_irqsave(&rq->lock, flags); + skb_queue_splice_init(rq, &queue); + spin_unlock_irqrestore(&rq->lock, flags); + + skb_queue_walk_safe(&queue, skb, tmp) { + __skb_unlink(skb, &queue); + vcc->push(vcc, skb); + } +} +EXPORT_SYMBOL(vcc_process_recv_queue); + void atm_dev_signal_change(struct atm_dev *dev, char signal) { pr_debug("%s signal=%d dev=%p number=%d dev->signal=%d\n", @@ -502,8 +522,11 @@ int vcc_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, if (sock->state != SS_CONNECTED) return -ENOTCONN; - if (flags & ~MSG_DONTWAIT) /* only handle MSG_DONTWAIT */ + + /* only handle MSG_DONTWAIT and MSG_PEEK */ + if (flags & ~(MSG_DONTWAIT | MSG_PEEK)) return -EOPNOTSUPP; + vcc = ATM_SD(sock); if (test_bit(ATM_VF_RELEASED, &vcc->flags) || test_bit(ATM_VF_CLOSE, &vcc->flags) || @@ -524,8 +547,13 @@ int vcc_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, if (error) return error; sock_recv_ts_and_drops(msg, sk, skb); - pr_debug("%d -= %d\n", atomic_read(&sk->sk_rmem_alloc), skb->truesize); - atm_return(vcc, skb->truesize); + + if (!(flags & MSG_PEEK)) { + pr_debug("%d -= %d\n", atomic_read(&sk->sk_rmem_alloc), + skb->truesize); + atm_return(vcc, skb->truesize); + } + skb_free_datagram(sk, skb); return copied; } diff --git a/net/atm/common.h b/net/atm/common.h index f48a76b6cdf4..cc3c2dae4d79 100644 --- a/net/atm/common.h +++ b/net/atm/common.h @@ -24,6 +24,7 @@ int vcc_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen); int vcc_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen); +void vcc_process_recv_queue(struct atm_vcc *vcc); int atmpvc_init(void); void atmpvc_exit(void); diff --git a/net/atm/lec.c b/net/atm/lec.c index 215c9fad7cdf..f1964caa0f83 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -643,7 +643,7 @@ static const struct net_device_ops lec_netdev_ops = { .ndo_start_xmit = lec_start_xmit, .ndo_change_mtu = lec_change_mtu, .ndo_tx_timeout = lec_tx_timeout, - .ndo_set_multicast_list = lec_set_multicast_list, + .ndo_set_rx_mode = lec_set_multicast_list, }; static const unsigned char lec_ctrl_magic[] = { diff --git a/net/atm/pppoatm.c b/net/atm/pppoatm.c index db4a11c61d15..df35d9a3b5fe 100644 --- a/net/atm/pppoatm.c +++ b/net/atm/pppoatm.c @@ -303,6 +303,10 @@ static int pppoatm_assign_vcc(struct atm_vcc *atmvcc, void __user *arg) atmvcc->push = pppoatm_push; atmvcc->pop = pppoatm_pop; __module_get(THIS_MODULE); + + /* re-process everything received between connection setup and + backend setup */ + vcc_process_recv_queue(atmvcc); return 0; } diff --git a/net/atm/pvc.c b/net/atm/pvc.c index 437ee70c5e62..3a734919c36c 100644 --- a/net/atm/pvc.c +++ b/net/atm/pvc.c @@ -11,6 +11,7 @@ #include <linux/init.h> #include <linux/skbuff.h> #include <linux/bitops.h> +#include <linux/export.h> #include <net/sock.h> /* for sock_no_* */ #include "resources.h" /* devs and vccs */ diff --git a/net/atm/svc.c b/net/atm/svc.c index 754ee4791d96..1281049c135f 100644 --- a/net/atm/svc.c +++ b/net/atm/svc.c @@ -20,6 +20,7 @@ #include <linux/bitops.h> #include <net/sock.h> /* for sock_no_* */ #include <linux/uaccess.h> +#include <linux/export.h> #include "resources.h" #include "common.h" /* common for PVCs and SVCs */ diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index e7c69f4619ec..3cd0a0dc91cb 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -402,14 +402,14 @@ static int ax25_ctl_ioctl(const unsigned int cmd, void __user *arg) break; case AX25_T1: - if (ax25_ctl.arg < 1) + if (ax25_ctl.arg < 1 || ax25_ctl.arg > ULONG_MAX / HZ) goto einval_put; ax25->rtt = (ax25_ctl.arg * HZ) / 2; ax25->t1 = ax25_ctl.arg * HZ; break; case AX25_T2: - if (ax25_ctl.arg < 1) + if (ax25_ctl.arg < 1 || ax25_ctl.arg > ULONG_MAX / HZ) goto einval_put; ax25->t2 = ax25_ctl.arg * HZ; break; @@ -422,10 +422,15 @@ static int ax25_ctl_ioctl(const unsigned int cmd, void __user *arg) break; case AX25_T3: + if (ax25_ctl.arg > ULONG_MAX / HZ) + goto einval_put; ax25->t3 = ax25_ctl.arg * HZ; break; case AX25_IDLE: + if (ax25_ctl.arg > ULONG_MAX / (60 * HZ)) + goto einval_put; + ax25->idle = ax25_ctl.arg * 60 * HZ; break; @@ -540,15 +545,16 @@ static int ax25_setsockopt(struct socket *sock, int level, int optname, ax25_cb *ax25; struct net_device *dev; char devname[IFNAMSIZ]; - int opt, res = 0; + unsigned long opt; + int res = 0; if (level != SOL_AX25) return -ENOPROTOOPT; - if (optlen < sizeof(int)) + if (optlen < sizeof(unsigned int)) return -EINVAL; - if (get_user(opt, (int __user *)optval)) + if (get_user(opt, (unsigned int __user *)optval)) return -EFAULT; lock_sock(sk); @@ -571,7 +577,7 @@ static int ax25_setsockopt(struct socket *sock, int level, int optname, break; case AX25_T1: - if (opt < 1) { + if (opt < 1 || opt > ULONG_MAX / HZ) { res = -EINVAL; break; } @@ -580,7 +586,7 @@ static int ax25_setsockopt(struct socket *sock, int level, int optname, break; case AX25_T2: - if (opt < 1) { + if (opt < 1 || opt > ULONG_MAX / HZ) { res = -EINVAL; break; } @@ -596,7 +602,7 @@ static int ax25_setsockopt(struct socket *sock, int level, int optname, break; case AX25_T3: - if (opt < 1) { + if (opt < 1 || opt > ULONG_MAX / HZ) { res = -EINVAL; break; } @@ -604,7 +610,7 @@ static int ax25_setsockopt(struct socket *sock, int level, int optname, break; case AX25_IDLE: - if (opt < 0) { + if (opt > ULONG_MAX / (60 * HZ)) { res = -EINVAL; break; } @@ -612,7 +618,7 @@ static int ax25_setsockopt(struct socket *sock, int level, int optname, break; case AX25_BACKOFF: - if (opt < 0 || opt > 2) { + if (opt > 2) { res = -EINVAL; break; } diff --git a/net/ax25/ax25_route.c b/net/ax25/ax25_route.c index a1690845dc6e..87fddab22e0f 100644 --- a/net/ax25/ax25_route.c +++ b/net/ax25/ax25_route.c @@ -38,6 +38,7 @@ #include <linux/interrupt.h> #include <linux/init.h> #include <linux/seq_file.h> +#include <linux/export.h> static ax25_route *ax25_route_list; static DEFINE_RWLOCK(ax25_route_lock); diff --git a/net/ax25/ax25_uid.c b/net/ax25/ax25_uid.c index d349be9578f5..4c83137b5954 100644 --- a/net/ax25/ax25_uid.c +++ b/net/ax25/ax25_uid.c @@ -37,6 +37,7 @@ #include <linux/stat.h> #include <linux/netfilter.h> #include <linux/sysctl.h> +#include <linux/export.h> #include <net/ip.h> #include <net/arp.h> diff --git a/net/batman-adv/Makefile b/net/batman-adv/Makefile index 2de93d00631b..ce6861166499 100644 --- a/net/batman-adv/Makefile +++ b/net/batman-adv/Makefile @@ -19,8 +19,8 @@ # obj-$(CONFIG_BATMAN_ADV) += batman-adv.o -batman-adv-y += aggregation.o batman-adv-y += bat_debugfs.o +batman-adv-y += bat_iv_ogm.o batman-adv-y += bat_sysfs.o batman-adv-y += bitarray.o batman-adv-y += gateway_client.o diff --git a/net/batman-adv/aggregation.c b/net/batman-adv/aggregation.c deleted file mode 100644 index 69467fe71ff2..000000000000 --- a/net/batman-adv/aggregation.c +++ /dev/null @@ -1,293 +0,0 @@ -/* - * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors: - * - * Marek Lindner, Simon Wunderlich - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA - * - */ - -#include "main.h" -#include "translation-table.h" -#include "aggregation.h" -#include "send.h" -#include "routing.h" -#include "hard-interface.h" - -/* return true if new_packet can be aggregated with forw_packet */ -static bool can_aggregate_with(const struct batman_packet *new_batman_packet, - struct bat_priv *bat_priv, - int packet_len, - unsigned long send_time, - bool directlink, - const struct hard_iface *if_incoming, - const struct forw_packet *forw_packet) -{ - struct batman_packet *batman_packet = - (struct batman_packet *)forw_packet->skb->data; - int aggregated_bytes = forw_packet->packet_len + packet_len; - struct hard_iface *primary_if = NULL; - bool res = false; - - /** - * we can aggregate the current packet to this aggregated packet - * if: - * - * - the send time is within our MAX_AGGREGATION_MS time - * - the resulting packet wont be bigger than - * MAX_AGGREGATION_BYTES - */ - - if (time_before(send_time, forw_packet->send_time) && - time_after_eq(send_time + msecs_to_jiffies(MAX_AGGREGATION_MS), - forw_packet->send_time) && - (aggregated_bytes <= MAX_AGGREGATION_BYTES)) { - - /** - * check aggregation compatibility - * -> direct link packets are broadcasted on - * their interface only - * -> aggregate packet if the current packet is - * a "global" packet as well as the base - * packet - */ - - primary_if = primary_if_get_selected(bat_priv); - if (!primary_if) - goto out; - - /* packets without direct link flag and high TTL - * are flooded through the net */ - if ((!directlink) && - (!(batman_packet->flags & DIRECTLINK)) && - (batman_packet->ttl != 1) && - - /* own packets originating non-primary - * interfaces leave only that interface */ - ((!forw_packet->own) || - (forw_packet->if_incoming == primary_if))) { - res = true; - goto out; - } - - /* if the incoming packet is sent via this one - * interface only - we still can aggregate */ - if ((directlink) && - (new_batman_packet->ttl == 1) && - (forw_packet->if_incoming == if_incoming) && - - /* packets from direct neighbors or - * own secondary interface packets - * (= secondary interface packets in general) */ - (batman_packet->flags & DIRECTLINK || - (forw_packet->own && - forw_packet->if_incoming != primary_if))) { - res = true; - goto out; - } - } - -out: - if (primary_if) - hardif_free_ref(primary_if); - return res; -} - -/* create a new aggregated packet and add this packet to it */ -static void new_aggregated_packet(const unsigned char *packet_buff, - int packet_len, unsigned long send_time, - bool direct_link, - struct hard_iface *if_incoming, - int own_packet) -{ - struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface); - struct forw_packet *forw_packet_aggr; - unsigned char *skb_buff; - - if (!atomic_inc_not_zero(&if_incoming->refcount)) - return; - - /* own packet should always be scheduled */ - if (!own_packet) { - if (!atomic_dec_not_zero(&bat_priv->batman_queue_left)) { - bat_dbg(DBG_BATMAN, bat_priv, - "batman packet queue full\n"); - goto out; - } - } - - forw_packet_aggr = kmalloc(sizeof(*forw_packet_aggr), GFP_ATOMIC); - if (!forw_packet_aggr) { - if (!own_packet) - atomic_inc(&bat_priv->batman_queue_left); - goto out; - } - - if ((atomic_read(&bat_priv->aggregated_ogms)) && - (packet_len < MAX_AGGREGATION_BYTES)) - forw_packet_aggr->skb = dev_alloc_skb(MAX_AGGREGATION_BYTES + - sizeof(struct ethhdr)); - else - forw_packet_aggr->skb = dev_alloc_skb(packet_len + - sizeof(struct ethhdr)); - - if (!forw_packet_aggr->skb) { - if (!own_packet) - atomic_inc(&bat_priv->batman_queue_left); - kfree(forw_packet_aggr); - goto out; - } - skb_reserve(forw_packet_aggr->skb, sizeof(struct ethhdr)); - - INIT_HLIST_NODE(&forw_packet_aggr->list); - - skb_buff = skb_put(forw_packet_aggr->skb, packet_len); - forw_packet_aggr->packet_len = packet_len; - memcpy(skb_buff, packet_buff, packet_len); - - forw_packet_aggr->own = own_packet; - forw_packet_aggr->if_incoming = if_incoming; - forw_packet_aggr->num_packets = 0; - forw_packet_aggr->direct_link_flags = NO_FLAGS; - forw_packet_aggr->send_time = send_time; - - /* save packet direct link flag status */ - if (direct_link) - forw_packet_aggr->direct_link_flags |= 1; - - /* add new packet to packet list */ - spin_lock_bh(&bat_priv->forw_bat_list_lock); - hlist_add_head(&forw_packet_aggr->list, &bat_priv->forw_bat_list); - spin_unlock_bh(&bat_priv->forw_bat_list_lock); - - /* start timer for this packet */ - INIT_DELAYED_WORK(&forw_packet_aggr->delayed_work, - send_outstanding_bat_packet); - queue_delayed_work(bat_event_workqueue, - &forw_packet_aggr->delayed_work, - send_time - jiffies); - - return; -out: - hardif_free_ref(if_incoming); -} - -/* aggregate a new packet into the existing aggregation */ -static void aggregate(struct forw_packet *forw_packet_aggr, - const unsigned char *packet_buff, int packet_len, - bool direct_link) -{ - unsigned char *skb_buff; - - skb_buff = skb_put(forw_packet_aggr->skb, packet_len); - memcpy(skb_buff, packet_buff, packet_len); - forw_packet_aggr->packet_len += packet_len; - forw_packet_aggr->num_packets++; - - /* save packet direct link flag status */ - if (direct_link) - forw_packet_aggr->direct_link_flags |= - (1 << forw_packet_aggr->num_packets); -} - -void add_bat_packet_to_list(struct bat_priv *bat_priv, - unsigned char *packet_buff, int packet_len, - struct hard_iface *if_incoming, int own_packet, - unsigned long send_time) -{ - /** - * _aggr -> pointer to the packet we want to aggregate with - * _pos -> pointer to the position in the queue - */ - struct forw_packet *forw_packet_aggr = NULL, *forw_packet_pos = NULL; - struct hlist_node *tmp_node; - struct batman_packet *batman_packet = - (struct batman_packet *)packet_buff; - bool direct_link = batman_packet->flags & DIRECTLINK ? 1 : 0; - - /* find position for the packet in the forward queue */ - spin_lock_bh(&bat_priv->forw_bat_list_lock); - /* own packets are not to be aggregated */ - if ((atomic_read(&bat_priv->aggregated_ogms)) && (!own_packet)) { - hlist_for_each_entry(forw_packet_pos, tmp_node, - &bat_priv->forw_bat_list, list) { - if (can_aggregate_with(batman_packet, - bat_priv, - packet_len, - send_time, - direct_link, - if_incoming, - forw_packet_pos)) { - forw_packet_aggr = forw_packet_pos; - break; - } - } - } - - /* nothing to aggregate with - either aggregation disabled or no - * suitable aggregation packet found */ - if (!forw_packet_aggr) { - /* the following section can run without the lock */ - spin_unlock_bh(&bat_priv->forw_bat_list_lock); - - /** - * if we could not aggregate this packet with one of the others - * we hold it back for a while, so that it might be aggregated - * later on - */ - if ((!own_packet) && - (atomic_read(&bat_priv->aggregated_ogms))) - send_time += msecs_to_jiffies(MAX_AGGREGATION_MS); - - new_aggregated_packet(packet_buff, packet_len, - send_time, direct_link, - if_incoming, own_packet); - } else { - aggregate(forw_packet_aggr, - packet_buff, packet_len, - direct_link); - spin_unlock_bh(&bat_priv->forw_bat_list_lock); - } -} - -/* unpack the aggregated packets and process them one by one */ -void receive_aggr_bat_packet(const struct ethhdr *ethhdr, - unsigned char *packet_buff, int packet_len, - struct hard_iface *if_incoming) -{ - struct batman_packet *batman_packet; - int buff_pos = 0; - unsigned char *tt_buff; - - batman_packet = (struct batman_packet *)packet_buff; - - do { - /* network to host order for our 32bit seqno and the - orig_interval */ - batman_packet->seqno = ntohl(batman_packet->seqno); - batman_packet->tt_crc = ntohs(batman_packet->tt_crc); - - tt_buff = packet_buff + buff_pos + BAT_PACKET_LEN; - - receive_bat_packet(ethhdr, batman_packet, tt_buff, if_incoming); - - buff_pos += BAT_PACKET_LEN + - tt_len(batman_packet->tt_num_changes); - - batman_packet = (struct batman_packet *) - (packet_buff + buff_pos); - } while (aggregated_packet(buff_pos, packet_len, - batman_packet->tt_num_changes)); -} diff --git a/net/batman-adv/aggregation.h b/net/batman-adv/aggregation.h deleted file mode 100644 index 216337bb841f..000000000000 --- a/net/batman-adv/aggregation.h +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors: - * - * Marek Lindner, Simon Wunderlich - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA - * - */ - -#ifndef _NET_BATMAN_ADV_AGGREGATION_H_ -#define _NET_BATMAN_ADV_AGGREGATION_H_ - -#include "main.h" - -/* is there another aggregated packet here? */ -static inline int aggregated_packet(int buff_pos, int packet_len, - int tt_num_changes) -{ - int next_buff_pos = buff_pos + BAT_PACKET_LEN + (tt_num_changes * - sizeof(struct tt_change)); - - return (next_buff_pos <= packet_len) && - (next_buff_pos <= MAX_AGGREGATION_BYTES); -} - -void add_bat_packet_to_list(struct bat_priv *bat_priv, - unsigned char *packet_buff, int packet_len, - struct hard_iface *if_incoming, int own_packet, - unsigned long send_time); -void receive_aggr_bat_packet(const struct ethhdr *ethhdr, - unsigned char *packet_buff, int packet_len, - struct hard_iface *if_incoming); - -#endif /* _NET_BATMAN_ADV_AGGREGATION_H_ */ diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c new file mode 100644 index 000000000000..3512e251545b --- /dev/null +++ b/net/batman-adv/bat_iv_ogm.c @@ -0,0 +1,1170 @@ +/* + * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors: + * + * Marek Lindner, Simon Wunderlich + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + * + */ + +#include "main.h" +#include "bat_ogm.h" +#include "translation-table.h" +#include "ring_buffer.h" +#include "originator.h" +#include "routing.h" +#include "gateway_common.h" +#include "gateway_client.h" +#include "hard-interface.h" +#include "send.h" + +void bat_ogm_init(struct hard_iface *hard_iface) +{ + struct batman_ogm_packet *batman_ogm_packet; + + hard_iface->packet_len = BATMAN_OGM_LEN; + hard_iface->packet_buff = kmalloc(hard_iface->packet_len, GFP_ATOMIC); + + batman_ogm_packet = (struct batman_ogm_packet *)hard_iface->packet_buff; + batman_ogm_packet->packet_type = BAT_OGM; + batman_ogm_packet->version = COMPAT_VERSION; + batman_ogm_packet->flags = NO_FLAGS; + batman_ogm_packet->ttl = 2; + batman_ogm_packet->tq = TQ_MAX_VALUE; + batman_ogm_packet->tt_num_changes = 0; + batman_ogm_packet->ttvn = 0; +} + +void bat_ogm_init_primary(struct hard_iface *hard_iface) +{ + struct batman_ogm_packet *batman_ogm_packet; + + batman_ogm_packet = (struct batman_ogm_packet *)hard_iface->packet_buff; + batman_ogm_packet->flags = PRIMARIES_FIRST_HOP; + batman_ogm_packet->ttl = TTL; +} + +void bat_ogm_update_mac(struct hard_iface *hard_iface) +{ + struct batman_ogm_packet *batman_ogm_packet; + + batman_ogm_packet = (struct batman_ogm_packet *)hard_iface->packet_buff; + memcpy(batman_ogm_packet->orig, + hard_iface->net_dev->dev_addr, ETH_ALEN); + memcpy(batman_ogm_packet->prev_sender, + hard_iface->net_dev->dev_addr, ETH_ALEN); +} + +/* when do we schedule our own ogm to be sent */ +static unsigned long bat_ogm_emit_send_time(const struct bat_priv *bat_priv) +{ + return jiffies + msecs_to_jiffies( + atomic_read(&bat_priv->orig_interval) - + JITTER + (random32() % 2*JITTER)); +} + +/* when do we schedule a ogm packet to be sent */ +static unsigned long bat_ogm_fwd_send_time(void) +{ + return jiffies + msecs_to_jiffies(random32() % (JITTER/2)); +} + +/* apply hop penalty for a normal link */ +static uint8_t hop_penalty(uint8_t tq, const struct bat_priv *bat_priv) +{ + int hop_penalty = atomic_read(&bat_priv->hop_penalty); + return (tq * (TQ_MAX_VALUE - hop_penalty)) / (TQ_MAX_VALUE); +} + +/* is there another aggregated packet here? */ +static int bat_ogm_aggr_packet(int buff_pos, int packet_len, + int tt_num_changes) +{ + int next_buff_pos = buff_pos + BATMAN_OGM_LEN + tt_len(tt_num_changes); + + return (next_buff_pos <= packet_len) && + (next_buff_pos <= MAX_AGGREGATION_BYTES); +} + +/* send a batman ogm to a given interface */ +static void bat_ogm_send_to_if(struct forw_packet *forw_packet, + struct hard_iface *hard_iface) +{ + struct bat_priv *bat_priv = netdev_priv(hard_iface->soft_iface); + char *fwd_str; + uint8_t packet_num; + int16_t buff_pos; + struct batman_ogm_packet *batman_ogm_packet; + struct sk_buff *skb; + + if (hard_iface->if_status != IF_ACTIVE) + return; + + packet_num = 0; + buff_pos = 0; + batman_ogm_packet = (struct batman_ogm_packet *)forw_packet->skb->data; + + /* adjust all flags and log packets */ + while (bat_ogm_aggr_packet(buff_pos, forw_packet->packet_len, + batman_ogm_packet->tt_num_changes)) { + + /* we might have aggregated direct link packets with an + * ordinary base packet */ + if ((forw_packet->direct_link_flags & (1 << packet_num)) && + (forw_packet->if_incoming == hard_iface)) + batman_ogm_packet->flags |= DIRECTLINK; + else + batman_ogm_packet->flags &= ~DIRECTLINK; + + fwd_str = (packet_num > 0 ? "Forwarding" : (forw_packet->own ? + "Sending own" : + "Forwarding")); + bat_dbg(DBG_BATMAN, bat_priv, + "%s %spacket (originator %pM, seqno %d, TQ %d, TTL %d," + " IDF %s, ttvn %d) on interface %s [%pM]\n", + fwd_str, (packet_num > 0 ? "aggregated " : ""), + batman_ogm_packet->orig, + ntohl(batman_ogm_packet->seqno), + batman_ogm_packet->tq, batman_ogm_packet->ttl, + (batman_ogm_packet->flags & DIRECTLINK ? + "on" : "off"), + batman_ogm_packet->ttvn, hard_iface->net_dev->name, + hard_iface->net_dev->dev_addr); + + buff_pos += BATMAN_OGM_LEN + + tt_len(batman_ogm_packet->tt_num_changes); + packet_num++; + batman_ogm_packet = (struct batman_ogm_packet *) + (forw_packet->skb->data + buff_pos); + } + + /* create clone because function is called more than once */ + skb = skb_clone(forw_packet->skb, GFP_ATOMIC); + if (skb) + send_skb_packet(skb, hard_iface, broadcast_addr); +} + +/* send a batman ogm packet */ +void bat_ogm_emit(struct forw_packet *forw_packet) +{ + struct hard_iface *hard_iface; + struct net_device *soft_iface; + struct bat_priv *bat_priv; + struct hard_iface *primary_if = NULL; + struct batman_ogm_packet *batman_ogm_packet; + unsigned char directlink; + + batman_ogm_packet = (struct batman_ogm_packet *) + (forw_packet->skb->data); + directlink = (batman_ogm_packet->flags & DIRECTLINK ? 1 : 0); + + if (!forw_packet->if_incoming) { + pr_err("Error - can't forward packet: incoming iface not " + "specified\n"); + goto out; + } + + soft_iface = forw_packet->if_incoming->soft_iface; + bat_priv = netdev_priv(soft_iface); + + if (forw_packet->if_incoming->if_status != IF_ACTIVE) + goto out; + + primary_if = primary_if_get_selected(bat_priv); + if (!primary_if) + goto out; + + /* multihomed peer assumed */ + /* non-primary OGMs are only broadcasted on their interface */ + if ((directlink && (batman_ogm_packet->ttl == 1)) || + (forw_packet->own && (forw_packet->if_incoming != primary_if))) { + + /* FIXME: what about aggregated packets ? */ + bat_dbg(DBG_BATMAN, bat_priv, + "%s packet (originator %pM, seqno %d, TTL %d) " + "on interface %s [%pM]\n", + (forw_packet->own ? "Sending own" : "Forwarding"), + batman_ogm_packet->orig, + ntohl(batman_ogm_packet->seqno), + batman_ogm_packet->ttl, + forw_packet->if_incoming->net_dev->name, + forw_packet->if_incoming->net_dev->dev_addr); + + /* skb is only used once and than forw_packet is free'd */ + send_skb_packet(forw_packet->skb, forw_packet->if_incoming, + broadcast_addr); + forw_packet->skb = NULL; + + goto out; + } + + /* broadcast on every interface */ + rcu_read_lock(); + list_for_each_entry_rcu(hard_iface, &hardif_list, list) { + if (hard_iface->soft_iface != soft_iface) + continue; + + bat_ogm_send_to_if(forw_packet, hard_iface); + } + rcu_read_unlock(); + +out: + if (primary_if) + hardif_free_ref(primary_if); +} + +/* return true if new_packet can be aggregated with forw_packet */ +static bool bat_ogm_can_aggregate(const struct batman_ogm_packet + *new_batman_ogm_packet, + struct bat_priv *bat_priv, + int packet_len, unsigned long send_time, + bool directlink, + const struct hard_iface *if_incoming, + const struct forw_packet *forw_packet) +{ + struct batman_ogm_packet *batman_ogm_packet; + int aggregated_bytes = forw_packet->packet_len + packet_len; + struct hard_iface *primary_if = NULL; + bool res = false; + + batman_ogm_packet = (struct batman_ogm_packet *)forw_packet->skb->data; + + /** + * we can aggregate the current packet to this aggregated packet + * if: + * + * - the send time is within our MAX_AGGREGATION_MS time + * - the resulting packet wont be bigger than + * MAX_AGGREGATION_BYTES + */ + + if (time_before(send_time, forw_packet->send_time) && + time_after_eq(send_time + msecs_to_jiffies(MAX_AGGREGATION_MS), + forw_packet->send_time) && + (aggregated_bytes <= MAX_AGGREGATION_BYTES)) { + + /** + * check aggregation compatibility + * -> direct link packets are broadcasted on + * their interface only + * -> aggregate packet if the current packet is + * a "global" packet as well as the base + * packet + */ + + primary_if = primary_if_get_selected(bat_priv); + if (!primary_if) + goto out; + + /* packets without direct link flag and high TTL + * are flooded through the net */ + if ((!directlink) && + (!(batman_ogm_packet->flags & DIRECTLINK)) && + (batman_ogm_packet->ttl != 1) && + + /* own packets originating non-primary + * interfaces leave only that interface */ + ((!forw_packet->own) || + (forw_packet->if_incoming == primary_if))) { + res = true; + goto out; + } + + /* if the incoming packet is sent via this one + * interface only - we still can aggregate */ + if ((directlink) && + (new_batman_ogm_packet->ttl == 1) && + (forw_packet->if_incoming == if_incoming) && + + /* packets from direct neighbors or + * own secondary interface packets + * (= secondary interface packets in general) */ + (batman_ogm_packet->flags & DIRECTLINK || + (forw_packet->own && + forw_packet->if_incoming != primary_if))) { + res = true; + goto out; + } + } + +out: + if (primary_if) + hardif_free_ref(primary_if); + return res; +} + +/* create a new aggregated packet and add this packet to it */ +static void bat_ogm_aggregate_new(const unsigned char *packet_buff, + int packet_len, unsigned long send_time, + bool direct_link, + struct hard_iface *if_incoming, + int own_packet) +{ + struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface); + struct forw_packet *forw_packet_aggr; + unsigned char *skb_buff; + + if (!atomic_inc_not_zero(&if_incoming->refcount)) + return; + + /* own packet should always be scheduled */ + if (!own_packet) { + if (!atomic_dec_not_zero(&bat_priv->batman_queue_left)) { + bat_dbg(DBG_BATMAN, bat_priv, + "batman packet queue full\n"); + goto out; + } + } + + forw_packet_aggr = kmalloc(sizeof(*forw_packet_aggr), GFP_ATOMIC); + if (!forw_packet_aggr) { + if (!own_packet) + atomic_inc(&bat_priv->batman_queue_left); + goto out; + } + + if ((atomic_read(&bat_priv->aggregated_ogms)) && + (packet_len < MAX_AGGREGATION_BYTES)) + forw_packet_aggr->skb = dev_alloc_skb(MAX_AGGREGATION_BYTES + + sizeof(struct ethhdr)); + else + forw_packet_aggr->skb = dev_alloc_skb(packet_len + + sizeof(struct ethhdr)); + + if (!forw_packet_aggr->skb) { + if (!own_packet) + atomic_inc(&bat_priv->batman_queue_left); + kfree(forw_packet_aggr); + goto out; + } + skb_reserve(forw_packet_aggr->skb, sizeof(struct ethhdr)); + + INIT_HLIST_NODE(&forw_packet_aggr->list); + + skb_buff = skb_put(forw_packet_aggr->skb, packet_len); + forw_packet_aggr->packet_len = packet_len; + memcpy(skb_buff, packet_buff, packet_len); + + forw_packet_aggr->own = own_packet; + forw_packet_aggr->if_incoming = if_incoming; + forw_packet_aggr->num_packets = 0; + forw_packet_aggr->direct_link_flags = NO_FLAGS; + forw_packet_aggr->send_time = send_time; + + /* save packet direct link flag status */ + if (direct_link) + forw_packet_aggr->direct_link_flags |= 1; + + /* add new packet to packet list */ + spin_lock_bh(&bat_priv->forw_bat_list_lock); + hlist_add_head(&forw_packet_aggr->list, &bat_priv->forw_bat_list); + spin_unlock_bh(&bat_priv->forw_bat_list_lock); + + /* start timer for this packet */ + INIT_DELAYED_WORK(&forw_packet_aggr->delayed_work, + send_outstanding_bat_ogm_packet); + queue_delayed_work(bat_event_workqueue, + &forw_packet_aggr->delayed_work, + send_time - jiffies); + + return; +out: + hardif_free_ref(if_incoming); +} + +/* aggregate a new packet into the existing ogm packet */ +static void bat_ogm_aggregate(struct forw_packet *forw_packet_aggr, + const unsigned char *packet_buff, + int packet_len, bool direct_link) +{ + unsigned char *skb_buff; + + skb_buff = skb_put(forw_packet_aggr->skb, packet_len); + memcpy(skb_buff, packet_buff, packet_len); + forw_packet_aggr->packet_len += packet_len; + forw_packet_aggr->num_packets++; + + /* save packet direct link flag status */ + if (direct_link) + forw_packet_aggr->direct_link_flags |= + (1 << forw_packet_aggr->num_packets); +} + +static void bat_ogm_queue_add(struct bat_priv *bat_priv, + unsigned char *packet_buff, + int packet_len, struct hard_iface *if_incoming, + int own_packet, unsigned long send_time) +{ + /** + * _aggr -> pointer to the packet we want to aggregate with + * _pos -> pointer to the position in the queue + */ + struct forw_packet *forw_packet_aggr = NULL, *forw_packet_pos = NULL; + struct hlist_node *tmp_node; + struct batman_ogm_packet *batman_ogm_packet; + bool direct_link; + + batman_ogm_packet = (struct batman_ogm_packet *)packet_buff; + direct_link = batman_ogm_packet->flags & DIRECTLINK ? 1 : 0; + + /* find position for the packet in the forward queue */ + spin_lock_bh(&bat_priv->forw_bat_list_lock); + /* own packets are not to be aggregated */ + if ((atomic_read(&bat_priv->aggregated_ogms)) && (!own_packet)) { + hlist_for_each_entry(forw_packet_pos, tmp_node, + &bat_priv->forw_bat_list, list) { + if (bat_ogm_can_aggregate(batman_ogm_packet, + bat_priv, packet_len, + send_time, direct_link, + if_incoming, + forw_packet_pos)) { + forw_packet_aggr = forw_packet_pos; + break; + } + } + } + + /* nothing to aggregate with - either aggregation disabled or no + * suitable aggregation packet found */ + if (!forw_packet_aggr) { + /* the following section can run without the lock */ + spin_unlock_bh(&bat_priv->forw_bat_list_lock); + + /** + * if we could not aggregate this packet with one of the others + * we hold it back for a while, so that it might be aggregated + * later on + */ + if ((!own_packet) && + (atomic_read(&bat_priv->aggregated_ogms))) + send_time += msecs_to_jiffies(MAX_AGGREGATION_MS); + + bat_ogm_aggregate_new(packet_buff, packet_len, + send_time, direct_link, + if_incoming, own_packet); + } else { + bat_ogm_aggregate(forw_packet_aggr, packet_buff, packet_len, + direct_link); + spin_unlock_bh(&bat_priv->forw_bat_list_lock); + } +} + +static void bat_ogm_forward(struct orig_node *orig_node, + const struct ethhdr *ethhdr, + struct batman_ogm_packet *batman_ogm_packet, + int directlink, struct hard_iface *if_incoming) +{ + struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface); + struct neigh_node *router; + uint8_t in_tq, in_ttl, tq_avg = 0; + uint8_t tt_num_changes; + + if (batman_ogm_packet->ttl <= 1) { + bat_dbg(DBG_BATMAN, bat_priv, "ttl exceeded\n"); + return; + } + + router = orig_node_get_router(orig_node); + + in_tq = batman_ogm_packet->tq; + in_ttl = batman_ogm_packet->ttl; + tt_num_changes = batman_ogm_packet->tt_num_changes; + + batman_ogm_packet->ttl--; + memcpy(batman_ogm_packet->prev_sender, ethhdr->h_source, ETH_ALEN); + + /* rebroadcast tq of our best ranking neighbor to ensure the rebroadcast + * of our best tq value */ + if (router && router->tq_avg != 0) { + + /* rebroadcast ogm of best ranking neighbor as is */ + if (!compare_eth(router->addr, ethhdr->h_source)) { + batman_ogm_packet->tq = router->tq_avg; + + if (router->last_ttl) + batman_ogm_packet->ttl = router->last_ttl - 1; + } + + tq_avg = router->tq_avg; + } + + if (router) + neigh_node_free_ref(router); + + /* apply hop penalty */ + batman_ogm_packet->tq = hop_penalty(batman_ogm_packet->tq, bat_priv); + + bat_dbg(DBG_BATMAN, bat_priv, + "Forwarding packet: tq_orig: %i, tq_avg: %i, " + "tq_forw: %i, ttl_orig: %i, ttl_forw: %i\n", + in_tq, tq_avg, batman_ogm_packet->tq, in_ttl - 1, + batman_ogm_packet->ttl); + + batman_ogm_packet->seqno = htonl(batman_ogm_packet->seqno); + batman_ogm_packet->tt_crc = htons(batman_ogm_packet->tt_crc); + + /* switch of primaries first hop flag when forwarding */ + batman_ogm_packet->flags &= ~PRIMARIES_FIRST_HOP; + if (directlink) + batman_ogm_packet->flags |= DIRECTLINK; + else + batman_ogm_packet->flags &= ~DIRECTLINK; + + bat_ogm_queue_add(bat_priv, (unsigned char *)batman_ogm_packet, + BATMAN_OGM_LEN + tt_len(tt_num_changes), + if_incoming, 0, bat_ogm_fwd_send_time()); +} + +void bat_ogm_schedule(struct hard_iface *hard_iface, int tt_num_changes) +{ + struct bat_priv *bat_priv = netdev_priv(hard_iface->soft_iface); + struct batman_ogm_packet *batman_ogm_packet; + struct hard_iface *primary_if; + int vis_server; + + vis_server = atomic_read(&bat_priv->vis_mode); + primary_if = primary_if_get_selected(bat_priv); + + batman_ogm_packet = (struct batman_ogm_packet *)hard_iface->packet_buff; + + /* change sequence number to network order */ + batman_ogm_packet->seqno = + htonl((uint32_t)atomic_read(&hard_iface->seqno)); + + batman_ogm_packet->ttvn = atomic_read(&bat_priv->ttvn); + batman_ogm_packet->tt_crc = htons((uint16_t) + atomic_read(&bat_priv->tt_crc)); + if (tt_num_changes >= 0) + batman_ogm_packet->tt_num_changes = tt_num_changes; + + if (vis_server == VIS_TYPE_SERVER_SYNC) + batman_ogm_packet->flags |= VIS_SERVER; + else + batman_ogm_packet->flags &= ~VIS_SERVER; + + if ((hard_iface == primary_if) && + (atomic_read(&bat_priv->gw_mode) == GW_MODE_SERVER)) + batman_ogm_packet->gw_flags = + (uint8_t)atomic_read(&bat_priv->gw_bandwidth); + else + batman_ogm_packet->gw_flags = NO_FLAGS; + + atomic_inc(&hard_iface->seqno); + + slide_own_bcast_window(hard_iface); + bat_ogm_queue_add(bat_priv, hard_iface->packet_buff, + hard_iface->packet_len, hard_iface, 1, + bat_ogm_emit_send_time(bat_priv)); + + if (primary_if) + hardif_free_ref(primary_if); +} + +static void bat_ogm_orig_update(struct bat_priv *bat_priv, + struct orig_node *orig_node, + const struct ethhdr *ethhdr, + const struct batman_ogm_packet + *batman_ogm_packet, + struct hard_iface *if_incoming, + const unsigned char *tt_buff, int is_duplicate) +{ + struct neigh_node *neigh_node = NULL, *tmp_neigh_node = NULL; + struct neigh_node *router = NULL; + struct orig_node *orig_node_tmp; + struct hlist_node *node; + uint8_t bcast_own_sum_orig, bcast_own_sum_neigh; + + bat_dbg(DBG_BATMAN, bat_priv, "update_originator(): " + "Searching and updating originator entry of received packet\n"); + + rcu_read_lock(); + hlist_for_each_entry_rcu(tmp_neigh_node, node, + &orig_node->neigh_list, list) { + if (compare_eth(tmp_neigh_node->addr, ethhdr->h_source) && + (tmp_neigh_node->if_incoming == if_incoming) && + atomic_inc_not_zero(&tmp_neigh_node->refcount)) { + if (neigh_node) + neigh_node_free_ref(neigh_node); + neigh_node = tmp_neigh_node; + continue; + } + + if (is_duplicate) + continue; + + spin_lock_bh(&tmp_neigh_node->tq_lock); + ring_buffer_set(tmp_neigh_node->tq_recv, + &tmp_neigh_node->tq_index, 0); + tmp_neigh_node->tq_avg = + ring_buffer_avg(tmp_neigh_node->tq_recv); + spin_unlock_bh(&tmp_neigh_node->tq_lock); + } + + if (!neigh_node) { + struct orig_node *orig_tmp; + + orig_tmp = get_orig_node(bat_priv, ethhdr->h_source); + if (!orig_tmp) + goto unlock; + + neigh_node = create_neighbor(orig_node, orig_tmp, + ethhdr->h_source, if_incoming); + + orig_node_free_ref(orig_tmp); + if (!neigh_node) + goto unlock; + } else + bat_dbg(DBG_BATMAN, bat_priv, + "Updating existing last-hop neighbor of originator\n"); + + rcu_read_unlock(); + + orig_node->flags = batman_ogm_packet->flags; + neigh_node->last_valid = jiffies; + + spin_lock_bh(&neigh_node->tq_lock); + ring_buffer_set(neigh_node->tq_recv, + &neigh_node->tq_index, + batman_ogm_packet->tq); + neigh_node->tq_avg = ring_buffer_avg(neigh_node->tq_recv); + spin_unlock_bh(&neigh_node->tq_lock); + + if (!is_duplicate) { + orig_node->last_ttl = batman_ogm_packet->ttl; + neigh_node->last_ttl = batman_ogm_packet->ttl; + } + + bonding_candidate_add(orig_node, neigh_node); + + /* if this neighbor already is our next hop there is nothing + * to change */ + router = orig_node_get_router(orig_node); + if (router == neigh_node) + goto update_tt; + + /* if this neighbor does not offer a better TQ we won't consider it */ + if (router && (router->tq_avg > neigh_node->tq_avg)) + goto update_tt; + + /* if the TQ is the same and the link not more symmetric we + * won't consider it either */ + if (router && (neigh_node->tq_avg == router->tq_avg)) { + orig_node_tmp = router->orig_node; + spin_lock_bh(&orig_node_tmp->ogm_cnt_lock); + bcast_own_sum_orig = + orig_node_tmp->bcast_own_sum[if_incoming->if_num]; + spin_unlock_bh(&orig_node_tmp->ogm_cnt_lock); + + orig_node_tmp = neigh_node->orig_node; + spin_lock_bh(&orig_node_tmp->ogm_cnt_lock); + bcast_own_sum_neigh = + orig_node_tmp->bcast_own_sum[if_incoming->if_num]; + spin_unlock_bh(&orig_node_tmp->ogm_cnt_lock); + + if (bcast_own_sum_orig >= bcast_own_sum_neigh) + goto update_tt; + } + + update_route(bat_priv, orig_node, neigh_node); + +update_tt: + /* I have to check for transtable changes only if the OGM has been + * sent through a primary interface */ + if (((batman_ogm_packet->orig != ethhdr->h_source) && + (batman_ogm_packet->ttl > 2)) || + (batman_ogm_packet->flags & PRIMARIES_FIRST_HOP)) + tt_update_orig(bat_priv, orig_node, tt_buff, + batman_ogm_packet->tt_num_changes, + batman_ogm_packet->ttvn, + batman_ogm_packet->tt_crc); + + if (orig_node->gw_flags != batman_ogm_packet->gw_flags) + gw_node_update(bat_priv, orig_node, + batman_ogm_packet->gw_flags); + + orig_node->gw_flags = batman_ogm_packet->gw_flags; + + /* restart gateway selection if fast or late switching was enabled */ + if ((orig_node->gw_flags) && + (atomic_read(&bat_priv->gw_mode) == GW_MODE_CLIENT) && + (atomic_read(&bat_priv->gw_sel_class) > 2)) + gw_check_election(bat_priv, orig_node); + + goto out; + +unlock: + rcu_read_unlock(); +out: + if (neigh_node) + neigh_node_free_ref(neigh_node); + if (router) + neigh_node_free_ref(router); +} + +static int bat_ogm_calc_tq(struct orig_node *orig_node, + struct orig_node *orig_neigh_node, + struct batman_ogm_packet *batman_ogm_packet, + struct hard_iface *if_incoming) +{ + struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface); + struct neigh_node *neigh_node = NULL, *tmp_neigh_node; + struct hlist_node *node; + uint8_t total_count; + uint8_t orig_eq_count, neigh_rq_count, tq_own; + int tq_asym_penalty, ret = 0; + + /* find corresponding one hop neighbor */ + rcu_read_lock(); + hlist_for_each_entry_rcu(tmp_neigh_node, node, + &orig_neigh_node->neigh_list, list) { + + if (!compare_eth(tmp_neigh_node->addr, orig_neigh_node->orig)) + continue; + + if (tmp_neigh_node->if_incoming != if_incoming) + continue; + + if (!atomic_inc_not_zero(&tmp_neigh_node->refcount)) + continue; + + neigh_node = tmp_neigh_node; + break; + } + rcu_read_unlock(); + + if (!neigh_node) + neigh_node = create_neighbor(orig_neigh_node, + orig_neigh_node, + orig_neigh_node->orig, + if_incoming); + + if (!neigh_node) + goto out; + + /* if orig_node is direct neighbor update neigh_node last_valid */ + if (orig_node == orig_neigh_node) + neigh_node->last_valid = jiffies; + + orig_node->last_valid = jiffies; + + /* find packet count of corresponding one hop neighbor */ + spin_lock_bh(&orig_node->ogm_cnt_lock); + orig_eq_count = orig_neigh_node->bcast_own_sum[if_incoming->if_num]; + neigh_rq_count = neigh_node->real_packet_count; + spin_unlock_bh(&orig_node->ogm_cnt_lock); + + /* pay attention to not get a value bigger than 100 % */ + total_count = (orig_eq_count > neigh_rq_count ? + neigh_rq_count : orig_eq_count); + + /* if we have too few packets (too less data) we set tq_own to zero */ + /* if we receive too few packets it is not considered bidirectional */ + if ((total_count < TQ_LOCAL_BIDRECT_SEND_MINIMUM) || + (neigh_rq_count < TQ_LOCAL_BIDRECT_RECV_MINIMUM)) + tq_own = 0; + else + /* neigh_node->real_packet_count is never zero as we + * only purge old information when getting new + * information */ + tq_own = (TQ_MAX_VALUE * total_count) / neigh_rq_count; + + /* + * 1 - ((1-x) ** 3), normalized to TQ_MAX_VALUE this does + * affect the nearly-symmetric links only a little, but + * punishes asymmetric links more. This will give a value + * between 0 and TQ_MAX_VALUE + */ + tq_asym_penalty = TQ_MAX_VALUE - (TQ_MAX_VALUE * + (TQ_LOCAL_WINDOW_SIZE - neigh_rq_count) * + (TQ_LOCAL_WINDOW_SIZE - neigh_rq_count) * + (TQ_LOCAL_WINDOW_SIZE - neigh_rq_count)) / + (TQ_LOCAL_WINDOW_SIZE * + TQ_LOCAL_WINDOW_SIZE * + TQ_LOCAL_WINDOW_SIZE); + + batman_ogm_packet->tq = ((batman_ogm_packet->tq * tq_own + * tq_asym_penalty) / + (TQ_MAX_VALUE * TQ_MAX_VALUE)); + + bat_dbg(DBG_BATMAN, bat_priv, + "bidirectional: " + "orig = %-15pM neigh = %-15pM => own_bcast = %2i, " + "real recv = %2i, local tq: %3i, asym_penalty: %3i, " + "total tq: %3i\n", + orig_node->orig, orig_neigh_node->orig, total_count, + neigh_rq_count, tq_own, tq_asym_penalty, batman_ogm_packet->tq); + + /* if link has the minimum required transmission quality + * consider it bidirectional */ + if (batman_ogm_packet->tq >= TQ_TOTAL_BIDRECT_LIMIT) + ret = 1; + +out: + if (neigh_node) + neigh_node_free_ref(neigh_node); + return ret; +} + +/* processes a batman packet for all interfaces, adjusts the sequence number and + * finds out whether it is a duplicate. + * returns: + * 1 the packet is a duplicate + * 0 the packet has not yet been received + * -1 the packet is old and has been received while the seqno window + * was protected. Caller should drop it. + */ +static int bat_ogm_update_seqnos(const struct ethhdr *ethhdr, + const struct batman_ogm_packet + *batman_ogm_packet, + const struct hard_iface *if_incoming) +{ + struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface); + struct orig_node *orig_node; + struct neigh_node *tmp_neigh_node; + struct hlist_node *node; + int is_duplicate = 0; + int32_t seq_diff; + int need_update = 0; + int set_mark, ret = -1; + + orig_node = get_orig_node(bat_priv, batman_ogm_packet->orig); + if (!orig_node) + return 0; + + spin_lock_bh(&orig_node->ogm_cnt_lock); + seq_diff = batman_ogm_packet->seqno - orig_node->last_real_seqno; + + /* signalize caller that the packet is to be dropped. */ + if (window_protected(bat_priv, seq_diff, + &orig_node->batman_seqno_reset)) + goto out; + + rcu_read_lock(); + hlist_for_each_entry_rcu(tmp_neigh_node, node, + &orig_node->neigh_list, list) { + + is_duplicate |= get_bit_status(tmp_neigh_node->real_bits, + orig_node->last_real_seqno, + batman_ogm_packet->seqno); + + if (compare_eth(tmp_neigh_node->addr, ethhdr->h_source) && + (tmp_neigh_node->if_incoming == if_incoming)) + set_mark = 1; + else + set_mark = 0; + + /* if the window moved, set the update flag. */ + need_update |= bit_get_packet(bat_priv, + tmp_neigh_node->real_bits, + seq_diff, set_mark); + + tmp_neigh_node->real_packet_count = + bit_packet_count(tmp_neigh_node->real_bits); + } + rcu_read_unlock(); + + if (need_update) { + bat_dbg(DBG_BATMAN, bat_priv, + "updating last_seqno: old %d, new %d\n", + orig_node->last_real_seqno, batman_ogm_packet->seqno); + orig_node->last_real_seqno = batman_ogm_packet->seqno; + } + + ret = is_duplicate; + +out: + spin_unlock_bh(&orig_node->ogm_cnt_lock); + orig_node_free_ref(orig_node); + return ret; +} + +static void bat_ogm_process(const struct ethhdr *ethhdr, + struct batman_ogm_packet *batman_ogm_packet, + const unsigned char *tt_buff, + struct hard_iface *if_incoming) +{ + struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface); + struct hard_iface *hard_iface; + struct orig_node *orig_neigh_node, *orig_node; + struct neigh_node *router = NULL, *router_router = NULL; + struct neigh_node *orig_neigh_router = NULL; + int has_directlink_flag; + int is_my_addr = 0, is_my_orig = 0, is_my_oldorig = 0; + int is_broadcast = 0, is_bidirectional, is_single_hop_neigh; + int is_duplicate; + uint32_t if_incoming_seqno; + + /* Silently drop when the batman packet is actually not a + * correct packet. + * + * This might happen if a packet is padded (e.g. Ethernet has a + * minimum frame length of 64 byte) and the aggregation interprets + * it as an additional length. + * + * TODO: A more sane solution would be to have a bit in the + * batman_ogm_packet to detect whether the packet is the last + * packet in an aggregation. Here we expect that the padding + * is always zero (or not 0x01) + */ + if (batman_ogm_packet->packet_type != BAT_OGM) + return; + + /* could be changed by schedule_own_packet() */ + if_incoming_seqno = atomic_read(&if_incoming->seqno); + + has_directlink_flag = (batman_ogm_packet->flags & DIRECTLINK ? 1 : 0); + + is_single_hop_neigh = (compare_eth(ethhdr->h_source, + batman_ogm_packet->orig) ? 1 : 0); + + bat_dbg(DBG_BATMAN, bat_priv, + "Received BATMAN packet via NB: %pM, IF: %s [%pM] " + "(from OG: %pM, via prev OG: %pM, seqno %d, ttvn %u, " + "crc %u, changes %u, td %d, TTL %d, V %d, IDF %d)\n", + ethhdr->h_source, if_incoming->net_dev->name, + if_incoming->net_dev->dev_addr, batman_ogm_packet->orig, + batman_ogm_packet->prev_sender, batman_ogm_packet->seqno, + batman_ogm_packet->ttvn, batman_ogm_packet->tt_crc, + batman_ogm_packet->tt_num_changes, batman_ogm_packet->tq, + batman_ogm_packet->ttl, batman_ogm_packet->version, + has_directlink_flag); + + rcu_read_lock(); + list_for_each_entry_rcu(hard_iface, &hardif_list, list) { + if (hard_iface->if_status != IF_ACTIVE) + continue; + + if (hard_iface->soft_iface != if_incoming->soft_iface) + continue; + + if (compare_eth(ethhdr->h_source, + hard_iface->net_dev->dev_addr)) + is_my_addr = 1; + + if (compare_eth(batman_ogm_packet->orig, + hard_iface->net_dev->dev_addr)) + is_my_orig = 1; + + if (compare_eth(batman_ogm_packet->prev_sender, + hard_iface->net_dev->dev_addr)) + is_my_oldorig = 1; + + if (is_broadcast_ether_addr(ethhdr->h_source)) + is_broadcast = 1; + } + rcu_read_unlock(); + + if (batman_ogm_packet->version != COMPAT_VERSION) { + bat_dbg(DBG_BATMAN, bat_priv, + "Drop packet: incompatible batman version (%i)\n", + batman_ogm_packet->version); + return; + } + + if (is_my_addr) { + bat_dbg(DBG_BATMAN, bat_priv, + "Drop packet: received my own broadcast (sender: %pM" + ")\n", + ethhdr->h_source); + return; + } + + if (is_broadcast) { + bat_dbg(DBG_BATMAN, bat_priv, "Drop packet: " + "ignoring all packets with broadcast source addr (sender: %pM" + ")\n", ethhdr->h_source); + return; + } + + if (is_my_orig) { + unsigned long *word; + int offset; + + orig_neigh_node = get_orig_node(bat_priv, ethhdr->h_source); + if (!orig_neigh_node) + return; + + /* neighbor has to indicate direct link and it has to + * come via the corresponding interface */ + /* save packet seqno for bidirectional check */ + if (has_directlink_flag && + compare_eth(if_incoming->net_dev->dev_addr, + batman_ogm_packet->orig)) { + offset = if_incoming->if_num * NUM_WORDS; + + spin_lock_bh(&orig_neigh_node->ogm_cnt_lock); + word = &(orig_neigh_node->bcast_own[offset]); + bit_mark(word, + if_incoming_seqno - + batman_ogm_packet->seqno - 2); + orig_neigh_node->bcast_own_sum[if_incoming->if_num] = + bit_packet_count(word); + spin_unlock_bh(&orig_neigh_node->ogm_cnt_lock); + } + + bat_dbg(DBG_BATMAN, bat_priv, "Drop packet: " + "originator packet from myself (via neighbor)\n"); + orig_node_free_ref(orig_neigh_node); + return; + } + + if (is_my_oldorig) { + bat_dbg(DBG_BATMAN, bat_priv, + "Drop packet: ignoring all rebroadcast echos (sender: " + "%pM)\n", ethhdr->h_source); + return; + } + + orig_node = get_orig_node(bat_priv, batman_ogm_packet->orig); + if (!orig_node) + return; + + is_duplicate = bat_ogm_update_seqnos(ethhdr, batman_ogm_packet, + if_incoming); + + if (is_duplicate == -1) { + bat_dbg(DBG_BATMAN, bat_priv, + "Drop packet: packet within seqno protection time " + "(sender: %pM)\n", ethhdr->h_source); + goto out; + } + + if (batman_ogm_packet->tq == 0) { + bat_dbg(DBG_BATMAN, bat_priv, + "Drop packet: originator packet with tq equal 0\n"); + goto out; + } + + router = orig_node_get_router(orig_node); + if (router) + router_router = orig_node_get_router(router->orig_node); + + /* avoid temporary routing loops */ + if (router && router_router && + (compare_eth(router->addr, batman_ogm_packet->prev_sender)) && + !(compare_eth(batman_ogm_packet->orig, + batman_ogm_packet->prev_sender)) && + (compare_eth(router->addr, router_router->addr))) { + bat_dbg(DBG_BATMAN, bat_priv, + "Drop packet: ignoring all rebroadcast packets that " + "may make me loop (sender: %pM)\n", ethhdr->h_source); + goto out; + } + + /* if sender is a direct neighbor the sender mac equals + * originator mac */ + orig_neigh_node = (is_single_hop_neigh ? + orig_node : + get_orig_node(bat_priv, ethhdr->h_source)); + if (!orig_neigh_node) + goto out; + + orig_neigh_router = orig_node_get_router(orig_neigh_node); + + /* drop packet if sender is not a direct neighbor and if we + * don't route towards it */ + if (!is_single_hop_neigh && (!orig_neigh_router)) { + bat_dbg(DBG_BATMAN, bat_priv, + "Drop packet: OGM via unknown neighbor!\n"); + goto out_neigh; + } + + is_bidirectional = bat_ogm_calc_tq(orig_node, orig_neigh_node, + batman_ogm_packet, if_incoming); + + bonding_save_primary(orig_node, orig_neigh_node, batman_ogm_packet); + + /* update ranking if it is not a duplicate or has the same + * seqno and similar ttl as the non-duplicate */ + if (is_bidirectional && + (!is_duplicate || + ((orig_node->last_real_seqno == batman_ogm_packet->seqno) && + (orig_node->last_ttl - 3 <= batman_ogm_packet->ttl)))) + bat_ogm_orig_update(bat_priv, orig_node, ethhdr, + batman_ogm_packet, if_incoming, + tt_buff, is_duplicate); + + /* is single hop (direct) neighbor */ + if (is_single_hop_neigh) { + + /* mark direct link on incoming interface */ + bat_ogm_forward(orig_node, ethhdr, batman_ogm_packet, + 1, if_incoming); + + bat_dbg(DBG_BATMAN, bat_priv, "Forwarding packet: " + "rebroadcast neighbor packet with direct link flag\n"); + goto out_neigh; + } + + /* multihop originator */ + if (!is_bidirectional) { + bat_dbg(DBG_BATMAN, bat_priv, + "Drop packet: not received via bidirectional link\n"); + goto out_neigh; + } + + if (is_duplicate) { + bat_dbg(DBG_BATMAN, bat_priv, + "Drop packet: duplicate packet received\n"); + goto out_neigh; + } + + bat_dbg(DBG_BATMAN, bat_priv, + "Forwarding packet: rebroadcast originator packet\n"); + bat_ogm_forward(orig_node, ethhdr, batman_ogm_packet, 0, if_incoming); + +out_neigh: + if ((orig_neigh_node) && (!is_single_hop_neigh)) + orig_node_free_ref(orig_neigh_node); +out: + if (router) + neigh_node_free_ref(router); + if (router_router) + neigh_node_free_ref(router_router); + if (orig_neigh_router) + neigh_node_free_ref(orig_neigh_router); + + orig_node_free_ref(orig_node); +} + +void bat_ogm_receive(const struct ethhdr *ethhdr, unsigned char *packet_buff, + int packet_len, struct hard_iface *if_incoming) +{ + struct batman_ogm_packet *batman_ogm_packet; + int buff_pos = 0; + unsigned char *tt_buff; + + batman_ogm_packet = (struct batman_ogm_packet *)packet_buff; + + /* unpack the aggregated packets and process them one by one */ + do { + /* network to host order for our 32bit seqno and the + orig_interval */ + batman_ogm_packet->seqno = ntohl(batman_ogm_packet->seqno); + batman_ogm_packet->tt_crc = ntohs(batman_ogm_packet->tt_crc); + + tt_buff = packet_buff + buff_pos + BATMAN_OGM_LEN; + + bat_ogm_process(ethhdr, batman_ogm_packet, + tt_buff, if_incoming); + + buff_pos += BATMAN_OGM_LEN + + tt_len(batman_ogm_packet->tt_num_changes); + + batman_ogm_packet = (struct batman_ogm_packet *) + (packet_buff + buff_pos); + } while (bat_ogm_aggr_packet(buff_pos, packet_len, + batman_ogm_packet->tt_num_changes)); +} diff --git a/net/batman-adv/bat_ogm.h b/net/batman-adv/bat_ogm.h new file mode 100644 index 000000000000..69329c107e28 --- /dev/null +++ b/net/batman-adv/bat_ogm.h @@ -0,0 +1,35 @@ +/* + * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors: + * + * Marek Lindner, Simon Wunderlich + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + * + */ + +#ifndef _NET_BATMAN_ADV_OGM_H_ +#define _NET_BATMAN_ADV_OGM_H_ + +#include "main.h" + +void bat_ogm_init(struct hard_iface *hard_iface); +void bat_ogm_init_primary(struct hard_iface *hard_iface); +void bat_ogm_update_mac(struct hard_iface *hard_iface); +void bat_ogm_schedule(struct hard_iface *hard_iface, int tt_num_changes); +void bat_ogm_emit(struct forw_packet *forw_packet); +void bat_ogm_receive(const struct ethhdr *ethhdr, unsigned char *packet_buff, + int packet_len, struct hard_iface *if_incoming); + +#endif /* _NET_BATMAN_ADV_OGM_H_ */ diff --git a/net/batman-adv/bat_sysfs.c b/net/batman-adv/bat_sysfs.c index cd15deba60a1..c25492f7d665 100644 --- a/net/batman-adv/bat_sysfs.c +++ b/net/batman-adv/bat_sysfs.c @@ -174,7 +174,7 @@ static int store_uint_attr(const char *buff, size_t count, unsigned long uint_val; int ret; - ret = strict_strtoul(buff, 10, &uint_val); + ret = kstrtoul(buff, 10, &uint_val); if (ret) { bat_info(net_dev, "%s: Invalid parameter received: %s\n", @@ -239,7 +239,7 @@ static ssize_t store_vis_mode(struct kobject *kobj, struct attribute *attr, unsigned long val; int ret, vis_mode_tmp = -1; - ret = strict_strtoul(buff, 10, &val); + ret = kstrtoul(buff, 10, &val); if (((count == 2) && (!ret) && (val == VIS_TYPE_CLIENT_UPDATE)) || (strncmp(buff, "client", 6) == 0) || @@ -380,6 +380,7 @@ static ssize_t store_gw_bwidth(struct kobject *kobj, struct attribute *attr, BAT_ATTR_BOOL(aggregated_ogms, S_IRUGO | S_IWUSR, NULL); BAT_ATTR_BOOL(bonding, S_IRUGO | S_IWUSR, NULL); BAT_ATTR_BOOL(fragmentation, S_IRUGO | S_IWUSR, update_min_mtu); +BAT_ATTR_BOOL(ap_isolation, S_IRUGO | S_IWUSR, NULL); static BAT_ATTR(vis_mode, S_IRUGO | S_IWUSR, show_vis_mode, store_vis_mode); static BAT_ATTR(gw_mode, S_IRUGO | S_IWUSR, show_gw_mode, store_gw_mode); BAT_ATTR_UINT(orig_interval, S_IRUGO | S_IWUSR, 2 * JITTER, INT_MAX, NULL); @@ -396,6 +397,7 @@ static struct bat_attribute *mesh_attrs[] = { &bat_attr_aggregated_ogms, &bat_attr_bonding, &bat_attr_fragmentation, + &bat_attr_ap_isolation, &bat_attr_vis_mode, &bat_attr_gw_mode, &bat_attr_orig_interval, diff --git a/net/batman-adv/bitarray.c b/net/batman-adv/bitarray.c index c1f4bfc09cc3..9bc63b209b3f 100644 --- a/net/batman-adv/bitarray.c +++ b/net/batman-adv/bitarray.c @@ -97,12 +97,12 @@ static void bit_shift(unsigned long *seq_bits, int32_t n) (seq_bits[i - word_num - 1] >> (WORD_BIT_SIZE-word_offset)); /* and the upper part of the right half and shift it left to - * it's position */ + * its position */ /* for our example that would be: word[0] = 9800 + 0076 = * 9876 */ } - /* now for our last word, i==word_num, we only have the it's "left" - * half. that's the 1000 word in our example.*/ + /* now for our last word, i==word_num, we only have its "left" half. + * that's the 1000 word in our example.*/ seq_bits[i] = (seq_bits[i - word_num] << word_offset); @@ -155,7 +155,7 @@ int bit_get_packet(void *priv, unsigned long *seq_bits, /* sequence number is much newer, probably missed a lot of packets */ if ((seq_num_diff >= TQ_LOCAL_WINDOW_SIZE) - || (seq_num_diff < EXPECTED_SEQNO_RANGE)) { + && (seq_num_diff < EXPECTED_SEQNO_RANGE)) { bat_dbg(DBG_BATMAN, bat_priv, "We missed a lot of packets (%i) !\n", seq_num_diff - 1); diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c index 056180ef9e1a..24403a7350f7 100644 --- a/net/batman-adv/gateway_client.c +++ b/net/batman-adv/gateway_client.c @@ -25,6 +25,7 @@ #include "gateway_common.h" #include "hard-interface.h" #include "originator.h" +#include "translation-table.h" #include "routing.h" #include <linux/ip.h> #include <linux/ipv6.h> @@ -532,14 +533,14 @@ static bool is_type_dhcprequest(struct sk_buff *skb, int header_len) pkt_len -= header_len + DHCP_OPTIONS_OFFSET + 1; /* Access the dhcp option lists. Each entry is made up by: - * - octect 1: option type - * - octect 2: option data len (only if type != 255 and 0) - * - octect 3: option data */ + * - octet 1: option type + * - octet 2: option data len (only if type != 255 and 0) + * - octet 3: option data */ while (*p != 255 && !ret) { - /* p now points to the first octect: option type */ + /* p now points to the first octet: option type */ if (*p == 53) { /* type 53 is the message type option. - * Jump the len octect and go to the data octect */ + * Jump the len octet and go to the data octet */ if (pkt_len < 2) goto out; p += 2; @@ -572,108 +573,142 @@ out: return ret; } -int gw_is_target(struct bat_priv *bat_priv, struct sk_buff *skb, - struct orig_node *old_gw) +bool gw_is_dhcp_target(struct sk_buff *skb, unsigned int *header_len) { struct ethhdr *ethhdr; struct iphdr *iphdr; struct ipv6hdr *ipv6hdr; struct udphdr *udphdr; - struct gw_node *curr_gw; - struct neigh_node *neigh_curr = NULL, *neigh_old = NULL; - unsigned int header_len = 0; - int ret = 1; - - if (atomic_read(&bat_priv->gw_mode) == GW_MODE_OFF) - return 0; /* check for ethernet header */ - if (!pskb_may_pull(skb, header_len + ETH_HLEN)) - return 0; + if (!pskb_may_pull(skb, *header_len + ETH_HLEN)) + return false; ethhdr = (struct ethhdr *)skb->data; - header_len += ETH_HLEN; + *header_len += ETH_HLEN; /* check for initial vlan header */ if (ntohs(ethhdr->h_proto) == ETH_P_8021Q) { - if (!pskb_may_pull(skb, header_len + VLAN_HLEN)) - return 0; + if (!pskb_may_pull(skb, *header_len + VLAN_HLEN)) + return false; ethhdr = (struct ethhdr *)(skb->data + VLAN_HLEN); - header_len += VLAN_HLEN; + *header_len += VLAN_HLEN; } /* check for ip header */ switch (ntohs(ethhdr->h_proto)) { case ETH_P_IP: - if (!pskb_may_pull(skb, header_len + sizeof(*iphdr))) - return 0; - iphdr = (struct iphdr *)(skb->data + header_len); - header_len += iphdr->ihl * 4; + if (!pskb_may_pull(skb, *header_len + sizeof(*iphdr))) + return false; + iphdr = (struct iphdr *)(skb->data + *header_len); + *header_len += iphdr->ihl * 4; /* check for udp header */ if (iphdr->protocol != IPPROTO_UDP) - return 0; + return false; break; case ETH_P_IPV6: - if (!pskb_may_pull(skb, header_len + sizeof(*ipv6hdr))) - return 0; - ipv6hdr = (struct ipv6hdr *)(skb->data + header_len); - header_len += sizeof(*ipv6hdr); + if (!pskb_may_pull(skb, *header_len + sizeof(*ipv6hdr))) + return false; + ipv6hdr = (struct ipv6hdr *)(skb->data + *header_len); + *header_len += sizeof(*ipv6hdr); /* check for udp header */ if (ipv6hdr->nexthdr != IPPROTO_UDP) - return 0; + return false; break; default: - return 0; + return false; } - if (!pskb_may_pull(skb, header_len + sizeof(*udphdr))) - return 0; - udphdr = (struct udphdr *)(skb->data + header_len); - header_len += sizeof(*udphdr); + if (!pskb_may_pull(skb, *header_len + sizeof(*udphdr))) + return false; + udphdr = (struct udphdr *)(skb->data + *header_len); + *header_len += sizeof(*udphdr); /* check for bootp port */ if ((ntohs(ethhdr->h_proto) == ETH_P_IP) && (ntohs(udphdr->dest) != 67)) - return 0; + return false; if ((ntohs(ethhdr->h_proto) == ETH_P_IPV6) && (ntohs(udphdr->dest) != 547)) - return 0; + return false; - if (atomic_read(&bat_priv->gw_mode) == GW_MODE_SERVER) - return -1; + return true; +} - curr_gw = gw_get_selected_gw_node(bat_priv); - if (!curr_gw) - return 0; - - /* If old_gw != NULL then this packet is unicast. - * So, at this point we have to check the message type: if it is a - * DHCPREQUEST we have to decide whether to drop it or not */ - if (old_gw && curr_gw->orig_node != old_gw) { - if (is_type_dhcprequest(skb, header_len)) { - /* If the dhcp packet has been sent to a different gw, - * we have to evaluate whether the old gw is still - * reliable enough */ - neigh_curr = find_router(bat_priv, curr_gw->orig_node, - NULL); - neigh_old = find_router(bat_priv, old_gw, NULL); - if (!neigh_curr || !neigh_old) - goto free_neigh; - if (neigh_curr->tq_avg - neigh_old->tq_avg < - GW_THRESHOLD) - ret = -1; - } +bool gw_out_of_range(struct bat_priv *bat_priv, + struct sk_buff *skb, struct ethhdr *ethhdr) +{ + struct neigh_node *neigh_curr = NULL, *neigh_old = NULL; + struct orig_node *orig_dst_node = NULL; + struct gw_node *curr_gw = NULL; + bool ret, out_of_range = false; + unsigned int header_len = 0; + uint8_t curr_tq_avg; + + ret = gw_is_dhcp_target(skb, &header_len); + if (!ret) + goto out; + + orig_dst_node = transtable_search(bat_priv, ethhdr->h_source, + ethhdr->h_dest); + if (!orig_dst_node) + goto out; + + if (!orig_dst_node->gw_flags) + goto out; + + ret = is_type_dhcprequest(skb, header_len); + if (!ret) + goto out; + + switch (atomic_read(&bat_priv->gw_mode)) { + case GW_MODE_SERVER: + /* If we are a GW then we are our best GW. We can artificially + * set the tq towards ourself as the maximum value */ + curr_tq_avg = TQ_MAX_VALUE; + break; + case GW_MODE_CLIENT: + curr_gw = gw_get_selected_gw_node(bat_priv); + if (!curr_gw) + goto out; + + /* packet is going to our gateway */ + if (curr_gw->orig_node == orig_dst_node) + goto out; + + /* If the dhcp packet has been sent to a different gw, + * we have to evaluate whether the old gw is still + * reliable enough */ + neigh_curr = find_router(bat_priv, curr_gw->orig_node, NULL); + if (!neigh_curr) + goto out; + + curr_tq_avg = neigh_curr->tq_avg; + break; + case GW_MODE_OFF: + default: + goto out; } -free_neigh: + + neigh_old = find_router(bat_priv, orig_dst_node, NULL); + if (!neigh_old) + goto out; + + if (curr_tq_avg - neigh_old->tq_avg > GW_THRESHOLD) + out_of_range = true; + +out: + if (orig_dst_node) + orig_node_free_ref(orig_dst_node); + if (curr_gw) + gw_node_free_ref(curr_gw); if (neigh_old) neigh_node_free_ref(neigh_old); if (neigh_curr) neigh_node_free_ref(neigh_curr); - if (curr_gw) - gw_node_free_ref(curr_gw); - return ret; + return out_of_range; } diff --git a/net/batman-adv/gateway_client.h b/net/batman-adv/gateway_client.h index b9b983c07feb..e1edba08eb1d 100644 --- a/net/batman-adv/gateway_client.h +++ b/net/batman-adv/gateway_client.h @@ -31,7 +31,8 @@ void gw_node_update(struct bat_priv *bat_priv, void gw_node_delete(struct bat_priv *bat_priv, struct orig_node *orig_node); void gw_node_purge(struct bat_priv *bat_priv); int gw_client_seq_print_text(struct seq_file *seq, void *offset); -int gw_is_target(struct bat_priv *bat_priv, struct sk_buff *skb, - struct orig_node *old_gw); +bool gw_is_dhcp_target(struct sk_buff *skb, unsigned int *header_len); +bool gw_out_of_range(struct bat_priv *bat_priv, + struct sk_buff *skb, struct ethhdr *ethhdr); #endif /* _NET_BATMAN_ADV_GATEWAY_CLIENT_H_ */ diff --git a/net/batman-adv/gateway_common.c b/net/batman-adv/gateway_common.c index 18661af0bc3b..c4ac7b0a2a63 100644 --- a/net/batman-adv/gateway_common.c +++ b/net/batman-adv/gateway_common.c @@ -97,7 +97,7 @@ static bool parse_gw_bandwidth(struct net_device *net_dev, char *buff, *tmp_ptr = '\0'; } - ret = strict_strtol(buff, 10, &ldown); + ret = kstrtol(buff, 10, &ldown); if (ret) { bat_err(net_dev, "Download speed of gateway mode invalid: %s\n", @@ -122,7 +122,7 @@ static bool parse_gw_bandwidth(struct net_device *net_dev, char *buff, *tmp_ptr = '\0'; } - ret = strict_strtol(slash_ptr + 1, 10, &lup); + ret = kstrtol(slash_ptr + 1, 10, &lup); if (ret) { bat_err(net_dev, "Upload speed of gateway mode invalid: " diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index db7aacf1e095..7704df468e0b 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -28,6 +28,7 @@ #include "bat_sysfs.h" #include "originator.h" #include "hash.h" +#include "bat_ogm.h" #include <linux/if_arp.h> @@ -131,7 +132,6 @@ static void primary_if_select(struct bat_priv *bat_priv, struct hard_iface *new_hard_iface) { struct hard_iface *curr_hard_iface; - struct batman_packet *batman_packet; ASSERT_RTNL(); @@ -147,10 +147,7 @@ static void primary_if_select(struct bat_priv *bat_priv, if (!new_hard_iface) return; - batman_packet = (struct batman_packet *)(new_hard_iface->packet_buff); - batman_packet->flags = PRIMARIES_FIRST_HOP; - batman_packet->ttl = TTL; - + bat_ogm_init_primary(new_hard_iface); primary_if_update_addr(bat_priv); } @@ -162,14 +159,6 @@ static bool hardif_is_iface_up(const struct hard_iface *hard_iface) return false; } -static void update_mac_addresses(struct hard_iface *hard_iface) -{ - memcpy(((struct batman_packet *)(hard_iface->packet_buff))->orig, - hard_iface->net_dev->dev_addr, ETH_ALEN); - memcpy(((struct batman_packet *)(hard_iface->packet_buff))->prev_sender, - hard_iface->net_dev->dev_addr, ETH_ALEN); -} - static void check_known_mac_addr(const struct net_device *net_dev) { const struct hard_iface *hard_iface; @@ -244,12 +233,12 @@ static void hardif_activate_interface(struct hard_iface *hard_iface) bat_priv = netdev_priv(hard_iface->soft_iface); - update_mac_addresses(hard_iface); + bat_ogm_update_mac(hard_iface); hard_iface->if_status = IF_TO_BE_ACTIVATED; /** * the first active interface becomes our primary interface or - * the next active interface after the old primay interface was removed + * the next active interface after the old primary interface was removed */ primary_if = primary_if_get_selected(bat_priv); if (!primary_if) @@ -283,7 +272,6 @@ int hardif_enable_interface(struct hard_iface *hard_iface, const char *iface_name) { struct bat_priv *bat_priv; - struct batman_packet *batman_packet; struct net_device *soft_iface; int ret; @@ -318,8 +306,8 @@ int hardif_enable_interface(struct hard_iface *hard_iface, hard_iface->soft_iface = soft_iface; bat_priv = netdev_priv(hard_iface->soft_iface); - hard_iface->packet_len = BAT_PACKET_LEN; - hard_iface->packet_buff = kmalloc(hard_iface->packet_len, GFP_ATOMIC); + + bat_ogm_init(hard_iface); if (!hard_iface->packet_buff) { bat_err(hard_iface->soft_iface, "Can't add interface packet " @@ -328,15 +316,6 @@ int hardif_enable_interface(struct hard_iface *hard_iface, goto err; } - batman_packet = (struct batman_packet *)(hard_iface->packet_buff); - batman_packet->packet_type = BAT_PACKET; - batman_packet->version = COMPAT_VERSION; - batman_packet->flags = NO_FLAGS; - batman_packet->ttl = 2; - batman_packet->tq = TQ_MAX_VALUE; - batman_packet->tt_num_changes = 0; - batman_packet->ttvn = 0; - hard_iface->if_num = bat_priv->num_ifaces; bat_priv->num_ifaces++; hard_iface->if_status = IF_INACTIVE; @@ -381,7 +360,7 @@ int hardif_enable_interface(struct hard_iface *hard_iface, hard_iface->net_dev->name); /* begin scheduling originator messages on that interface */ - schedule_own_packet(hard_iface); + schedule_bat_ogm(hard_iface); out: return 0; @@ -455,11 +434,8 @@ static struct hard_iface *hardif_add_interface(struct net_device *net_dev) dev_hold(net_dev); hard_iface = kmalloc(sizeof(*hard_iface), GFP_ATOMIC); - if (!hard_iface) { - pr_err("Can't add interface (%s): out of memory\n", - net_dev->name); + if (!hard_iface) goto release_dev; - } ret = sysfs_add_hardif(&hard_iface->hardif_obj, net_dev); if (ret) @@ -551,7 +527,7 @@ static int hard_if_event(struct notifier_block *this, goto hardif_put; check_known_mac_addr(hard_iface->net_dev); - update_mac_addresses(hard_iface); + bat_ogm_update_mac(hard_iface); bat_priv = netdev_priv(hard_iface->soft_iface); primary_if = primary_if_get_selected(bat_priv); @@ -573,14 +549,14 @@ out: return NOTIFY_DONE; } -/* receive a packet with the batman ethertype coming on a hard +/* incoming packets with the batman ethertype received on any active hard * interface */ static int batman_skb_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *ptype, struct net_device *orig_dev) { struct bat_priv *bat_priv; - struct batman_packet *batman_packet; + struct batman_ogm_packet *batman_ogm_packet; struct hard_iface *hard_iface; int ret; @@ -612,22 +588,22 @@ static int batman_skb_recv(struct sk_buff *skb, struct net_device *dev, if (hard_iface->if_status != IF_ACTIVE) goto err_free; - batman_packet = (struct batman_packet *)skb->data; + batman_ogm_packet = (struct batman_ogm_packet *)skb->data; - if (batman_packet->version != COMPAT_VERSION) { + if (batman_ogm_packet->version != COMPAT_VERSION) { bat_dbg(DBG_BATMAN, bat_priv, "Drop packet: incompatible batman version (%i)\n", - batman_packet->version); + batman_ogm_packet->version); goto err_free; } /* all receive handlers return whether they received or reused * the supplied skb. if not, we have to free the skb. */ - switch (batman_packet->packet_type) { + switch (batman_ogm_packet->packet_type) { /* batman originator packet */ - case BAT_PACKET: - ret = recv_bat_packet(skb, hard_iface); + case BAT_OGM: + ret = recv_bat_ogm_packet(skb, hard_iface); break; /* batman icmp packet */ @@ -681,6 +657,36 @@ err_out: return NET_RX_DROP; } +/* This function returns true if the interface represented by ifindex is a + * 802.11 wireless device */ +bool is_wifi_iface(int ifindex) +{ + struct net_device *net_device = NULL; + bool ret = false; + + if (ifindex == NULL_IFINDEX) + goto out; + + net_device = dev_get_by_index(&init_net, ifindex); + if (!net_device) + goto out; + +#ifdef CONFIG_WIRELESS_EXT + /* pre-cfg80211 drivers have to implement WEXT, so it is possible to + * check for wireless_handlers != NULL */ + if (net_device->wireless_handlers) + ret = true; + else +#endif + /* cfg80211 drivers have to set ieee80211_ptr */ + if (net_device->ieee80211_ptr) + ret = true; +out: + if (net_device) + dev_put(net_device); + return ret; +} + struct notifier_block hard_if_notifier = { .notifier_call = hard_if_event, }; diff --git a/net/batman-adv/hard-interface.h b/net/batman-adv/hard-interface.h index 442eacbc9e3a..67f78d1a63b4 100644 --- a/net/batman-adv/hard-interface.h +++ b/net/batman-adv/hard-interface.h @@ -42,6 +42,7 @@ void hardif_remove_interfaces(void); int hardif_min_mtu(struct net_device *soft_iface); void update_min_mtu(struct net_device *soft_iface); void hardif_free_rcu(struct rcu_head *rcu); +bool is_wifi_iface(int ifindex); static inline void hardif_free_ref(struct hard_iface *hard_iface) { diff --git a/net/batman-adv/hash.c b/net/batman-adv/hash.c index 2a172505f513..d1da29da333b 100644 --- a/net/batman-adv/hash.c +++ b/net/batman-adv/hash.c @@ -25,7 +25,7 @@ /* clears the hash */ static void hash_init(struct hashtable_t *hash) { - int i; + uint32_t i; for (i = 0 ; i < hash->size; i++) { INIT_HLIST_HEAD(&hash->table[i]); @@ -42,7 +42,7 @@ void hash_destroy(struct hashtable_t *hash) } /* allocates and clears the hash */ -struct hashtable_t *hash_new(int size) +struct hashtable_t *hash_new(uint32_t size) { struct hashtable_t *hash; diff --git a/net/batman-adv/hash.h b/net/batman-adv/hash.h index dd5c9fd7a905..4768717f07f9 100644 --- a/net/batman-adv/hash.h +++ b/net/batman-adv/hash.h @@ -33,17 +33,17 @@ typedef int (*hashdata_compare_cb)(const struct hlist_node *, const void *); /* the hashfunction, should return an index * based on the key in the data of the first * argument and the size the second */ -typedef int (*hashdata_choose_cb)(const void *, int); +typedef uint32_t (*hashdata_choose_cb)(const void *, uint32_t); typedef void (*hashdata_free_cb)(struct hlist_node *, void *); struct hashtable_t { struct hlist_head *table; /* the hashtable itself with the buckets */ spinlock_t *list_locks; /* spinlock for each hash list entry */ - int size; /* size of hashtable */ + uint32_t size; /* size of hashtable */ }; /* allocates and clears the hash */ -struct hashtable_t *hash_new(int size); +struct hashtable_t *hash_new(uint32_t size); /* free only the hashtable and the hash itself. */ void hash_destroy(struct hashtable_t *hash); @@ -57,7 +57,7 @@ static inline void hash_delete(struct hashtable_t *hash, struct hlist_head *head; struct hlist_node *node, *node_tmp; spinlock_t *list_lock; /* spinlock to protect write access */ - int i; + uint32_t i; for (i = 0; i < hash->size; i++) { head = &hash->table[i]; @@ -76,19 +76,31 @@ static inline void hash_delete(struct hashtable_t *hash, hash_destroy(hash); } -/* adds data to the hashtable. returns 0 on success, -1 on error */ +/** + * hash_add - adds data to the hashtable + * @hash: storage hash table + * @compare: callback to determine if 2 hash elements are identical + * @choose: callback calculating the hash index + * @data: data passed to the aforementioned callbacks as argument + * @data_node: to be added element + * + * Returns 0 on success, 1 if the element already is in the hash + * and -1 on error. + */ + static inline int hash_add(struct hashtable_t *hash, hashdata_compare_cb compare, hashdata_choose_cb choose, const void *data, struct hlist_node *data_node) { - int index; + uint32_t index; + int ret = -1; struct hlist_head *head; struct hlist_node *node; spinlock_t *list_lock; /* spinlock to protect write access */ if (!hash) - goto err; + goto out; index = choose(data, hash->size); head = &hash->table[index]; @@ -99,6 +111,7 @@ static inline int hash_add(struct hashtable_t *hash, if (!compare(node, data)) continue; + ret = 1; goto err_unlock; } rcu_read_unlock(); @@ -108,12 +121,13 @@ static inline int hash_add(struct hashtable_t *hash, hlist_add_head_rcu(data_node, head); spin_unlock_bh(list_lock); - return 0; + ret = 0; + goto out; err_unlock: rcu_read_unlock(); -err: - return -1; +out: + return ret; } /* removes data from hash, if found. returns pointer do data on success, so you @@ -124,7 +138,7 @@ static inline void *hash_remove(struct hashtable_t *hash, hashdata_compare_cb compare, hashdata_choose_cb choose, void *data) { - size_t index; + uint32_t index; struct hlist_node *node; struct hlist_head *head; void *data_save = NULL; diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c index ac3520e057c0..d9c1e7bb7fbf 100644 --- a/net/batman-adv/icmp_socket.c +++ b/net/batman-adv/icmp_socket.c @@ -136,10 +136,9 @@ static ssize_t bat_socket_read(struct file *file, char __user *buf, spin_unlock_bh(&socket_client->lock); - error = __copy_to_user(buf, &socket_packet->icmp_packet, - socket_packet->icmp_len); + packet_len = min(count, socket_packet->icmp_len); + error = copy_to_user(buf, &socket_packet->icmp_packet, packet_len); - packet_len = socket_packet->icmp_len; kfree(socket_packet); if (error) @@ -187,12 +186,7 @@ static ssize_t bat_socket_write(struct file *file, const char __user *buff, skb_reserve(skb, sizeof(struct ethhdr)); icmp_packet = (struct icmp_packet_rr *)skb_put(skb, packet_len); - if (!access_ok(VERIFY_READ, buff, packet_len)) { - len = -EFAULT; - goto free_skb; - } - - if (__copy_from_user(icmp_packet, buff, packet_len)) { + if (copy_from_user(icmp_packet, buff, packet_len)) { len = -EFAULT; goto free_skb; } @@ -217,7 +211,7 @@ static ssize_t bat_socket_write(struct file *file, const char __user *buff, if (icmp_packet->version != COMPAT_VERSION) { icmp_packet->msg_type = PARAMETER_PROBLEM; - icmp_packet->ttl = COMPAT_VERSION; + icmp_packet->version = COMPAT_VERSION; bat_socket_add_packet(socket_client, icmp_packet, packet_len); goto free_skb; } diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index b0f9068ade57..fb87bdc2ce9b 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -107,7 +107,7 @@ int mesh_init(struct net_device *soft_iface) if (tt_init(bat_priv) < 1) goto err; - tt_local_add(soft_iface, soft_iface->dev_addr); + tt_local_add(soft_iface, soft_iface->dev_addr, NULL_IFINDEX); if (vis_init(bat_priv) < 1) goto err; @@ -117,8 +117,6 @@ int mesh_init(struct net_device *soft_iface) goto end; err: - pr_err("Unable to allocate memory for mesh information structures: " - "out of mem ?\n"); mesh_free(soft_iface); return -1; diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index a6df61a6933b..86354e06eb48 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -28,7 +28,7 @@ #define DRIVER_DEVICE "batman-adv" #ifndef SOURCE_VERSION -#define SOURCE_VERSION "2011.3.0" +#define SOURCE_VERSION "2012.0.0" #endif /* B.A.T.M.A.N. parameters */ @@ -44,7 +44,7 @@ #define PURGE_TIMEOUT 200 #define TT_LOCAL_TIMEOUT 3600 /* in seconds */ #define TT_CLIENT_ROAM_TIMEOUT 600 -/* sliding packet range of received originator messages in squence numbers +/* sliding packet range of received originator messages in sequence numbers * (should be a multiple of our word size) */ #define TQ_LOCAL_WINDOW_SIZE 64 #define TT_REQUEST_TIMEOUT 3 /* seconds we have to keep pending tt_req */ @@ -62,6 +62,8 @@ #define NO_FLAGS 0 +#define NULL_IFINDEX 0 /* dummy ifindex used to avoid iface checks */ + #define NUM_WORDS (TQ_LOCAL_WINDOW_SIZE / WORD_BIT_SIZE) #define LOG_BUF_LEN 8192 /* has to be a power of 2 */ @@ -133,7 +135,7 @@ enum dbg_level { #include <linux/mutex.h> /* mutex */ #include <linux/module.h> /* needed by all modules */ #include <linux/netdevice.h> /* netdevice */ -#include <linux/etherdevice.h> /* ethernet address classifaction */ +#include <linux/etherdevice.h> /* ethernet address classification */ #include <linux/if_ether.h> /* ethernet header */ #include <linux/poll.h> /* poll_table */ #include <linux/kthread.h> /* kernel threads */ diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index f3c3f620d195..0bc2045a2f2e 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -164,7 +164,7 @@ void originator_free(struct bat_priv *bat_priv) struct hlist_head *head; spinlock_t *list_lock; /* spinlock to protect write access */ struct orig_node *orig_node; - int i; + uint32_t i; if (!hash) return; @@ -252,7 +252,7 @@ struct orig_node *get_orig_node(struct bat_priv *bat_priv, const uint8_t *addr) hash_added = hash_add(bat_priv->orig_hash, compare_orig, choose_orig, orig_node, &orig_node->hash_entry); - if (hash_added < 0) + if (hash_added != 0) goto free_bcast_own_sum; return orig_node; @@ -336,8 +336,7 @@ static bool purge_orig_node(struct bat_priv *bat_priv, } else { if (purge_orig_neighbors(bat_priv, orig_node, &best_neigh_node)) { - update_routes(bat_priv, orig_node, - best_neigh_node); + update_route(bat_priv, orig_node, best_neigh_node); } } @@ -351,7 +350,7 @@ static void _purge_orig(struct bat_priv *bat_priv) struct hlist_head *head; spinlock_t *list_lock; /* spinlock to protect write access */ struct orig_node *orig_node; - int i; + uint32_t i; if (!hash) return; @@ -414,7 +413,8 @@ int orig_seq_print_text(struct seq_file *seq, void *offset) int batman_count = 0; int last_seen_secs; int last_seen_msecs; - int i, ret = 0; + uint32_t i; + int ret = 0; primary_if = primary_if_get_selected(bat_priv); @@ -493,10 +493,8 @@ static int orig_node_add_if(struct orig_node *orig_node, int max_if_num) data_ptr = kmalloc(max_if_num * sizeof(unsigned long) * NUM_WORDS, GFP_ATOMIC); - if (!data_ptr) { - pr_err("Can't resize orig: out of memory\n"); + if (!data_ptr) return -1; - } memcpy(data_ptr, orig_node->bcast_own, (max_if_num - 1) * sizeof(unsigned long) * NUM_WORDS); @@ -504,10 +502,8 @@ static int orig_node_add_if(struct orig_node *orig_node, int max_if_num) orig_node->bcast_own = data_ptr; data_ptr = kmalloc(max_if_num * sizeof(uint8_t), GFP_ATOMIC); - if (!data_ptr) { - pr_err("Can't resize orig: out of memory\n"); + if (!data_ptr) return -1; - } memcpy(data_ptr, orig_node->bcast_own_sum, (max_if_num - 1) * sizeof(uint8_t)); @@ -524,7 +520,8 @@ int orig_hash_add_if(struct hard_iface *hard_iface, int max_if_num) struct hlist_node *node; struct hlist_head *head; struct orig_node *orig_node; - int i, ret; + uint32_t i; + int ret; /* resize all orig nodes because orig_node->bcast_own(_sum) depend on * if_num */ @@ -562,10 +559,8 @@ static int orig_node_del_if(struct orig_node *orig_node, chunk_size = sizeof(unsigned long) * NUM_WORDS; data_ptr = kmalloc(max_if_num * chunk_size, GFP_ATOMIC); - if (!data_ptr) { - pr_err("Can't resize orig: out of memory\n"); + if (!data_ptr) return -1; - } /* copy first part */ memcpy(data_ptr, orig_node->bcast_own, del_if_num * chunk_size); @@ -583,10 +578,8 @@ free_bcast_own: goto free_own_sum; data_ptr = kmalloc(max_if_num * sizeof(uint8_t), GFP_ATOMIC); - if (!data_ptr) { - pr_err("Can't resize orig: out of memory\n"); + if (!data_ptr) return -1; - } memcpy(data_ptr, orig_node->bcast_own_sum, del_if_num * sizeof(uint8_t)); @@ -610,7 +603,8 @@ int orig_hash_del_if(struct hard_iface *hard_iface, int max_if_num) struct hlist_head *head; struct hard_iface *hard_iface_tmp; struct orig_node *orig_node; - int i, ret; + uint32_t i; + int ret; /* resize all orig nodes because orig_node->bcast_own(_sum) depend on * if_num */ diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h index cfc1f60a96a1..67765ffef731 100644 --- a/net/batman-adv/originator.h +++ b/net/batman-adv/originator.h @@ -42,7 +42,7 @@ int orig_hash_del_if(struct hard_iface *hard_iface, int max_if_num); /* hashfunction to choose an entry in a hash table of given size */ /* hash algorithm from http://en.wikipedia.org/wiki/Hash_table */ -static inline int choose_orig(const void *data, int32_t size) +static inline uint32_t choose_orig(const void *data, uint32_t size) { const unsigned char *key = data; uint32_t hash = 0; diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h index b76b4be10b92..4d9e54c57a36 100644 --- a/net/batman-adv/packet.h +++ b/net/batman-adv/packet.h @@ -25,14 +25,14 @@ #define ETH_P_BATMAN 0x4305 /* unofficial/not registered Ethertype */ enum bat_packettype { - BAT_PACKET = 0x01, - BAT_ICMP = 0x02, - BAT_UNICAST = 0x03, - BAT_BCAST = 0x04, - BAT_VIS = 0x05, + BAT_OGM = 0x01, + BAT_ICMP = 0x02, + BAT_UNICAST = 0x03, + BAT_BCAST = 0x04, + BAT_VIS = 0x05, BAT_UNICAST_FRAG = 0x06, - BAT_TT_QUERY = 0x07, - BAT_ROAM_ADV = 0x08 + BAT_TT_QUERY = 0x07, + BAT_ROAM_ADV = 0x08 }; /* this file is included by batctl which needs these defines */ @@ -84,12 +84,13 @@ enum tt_query_flags { enum tt_client_flags { TT_CLIENT_DEL = 1 << 0, TT_CLIENT_ROAM = 1 << 1, + TT_CLIENT_WIFI = 1 << 2, TT_CLIENT_NOPURGE = 1 << 8, TT_CLIENT_NEW = 1 << 9, TT_CLIENT_PENDING = 1 << 10 }; -struct batman_packet { +struct batman_ogm_packet { uint8_t packet_type; uint8_t version; /* batman version field */ uint8_t ttl; @@ -104,7 +105,7 @@ struct batman_packet { uint16_t tt_crc; } __packed; -#define BAT_PACKET_LEN sizeof(struct batman_packet) +#define BATMAN_OGM_LEN sizeof(struct batman_ogm_packet) struct icmp_packet { uint8_t packet_type; diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index 0f32c818874d..773e606f9702 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -22,18 +22,14 @@ #include "main.h" #include "routing.h" #include "send.h" -#include "hash.h" #include "soft-interface.h" #include "hard-interface.h" #include "icmp_socket.h" #include "translation-table.h" #include "originator.h" -#include "ring_buffer.h" #include "vis.h" -#include "aggregation.h" -#include "gateway_common.h" -#include "gateway_client.h" #include "unicast.h" +#include "bat_ogm.h" void slide_own_bcast_window(struct hard_iface *hard_iface) { @@ -43,7 +39,7 @@ void slide_own_bcast_window(struct hard_iface *hard_iface) struct hlist_head *head; struct orig_node *orig_node; unsigned long *word; - int i; + uint32_t i; size_t word_index; for (i = 0; i < hash->size; i++) { @@ -64,69 +60,9 @@ void slide_own_bcast_window(struct hard_iface *hard_iface) } } -static void update_transtable(struct bat_priv *bat_priv, - struct orig_node *orig_node, - const unsigned char *tt_buff, - uint8_t tt_num_changes, uint8_t ttvn, - uint16_t tt_crc) -{ - uint8_t orig_ttvn = (uint8_t)atomic_read(&orig_node->last_ttvn); - bool full_table = true; - - /* the ttvn increased by one -> we can apply the attached changes */ - if (ttvn - orig_ttvn == 1) { - /* the OGM could not contain the changes because they were too - * many to fit in one frame or because they have already been - * sent TT_OGM_APPEND_MAX times. In this case send a tt - * request */ - if (!tt_num_changes) { - full_table = false; - goto request_table; - } - - tt_update_changes(bat_priv, orig_node, tt_num_changes, ttvn, - (struct tt_change *)tt_buff); - - /* Even if we received the crc into the OGM, we prefer - * to recompute it to spot any possible inconsistency - * in the global table */ - orig_node->tt_crc = tt_global_crc(bat_priv, orig_node); - - /* The ttvn alone is not enough to guarantee consistency - * because a single value could repesent different states - * (due to the wrap around). Thus a node has to check whether - * the resulting table (after applying the changes) is still - * consistent or not. E.g. a node could disconnect while its - * ttvn is X and reconnect on ttvn = X + TTVN_MAX: in this case - * checking the CRC value is mandatory to detect the - * inconsistency */ - if (orig_node->tt_crc != tt_crc) - goto request_table; - - /* Roaming phase is over: tables are in sync again. I can - * unset the flag */ - orig_node->tt_poss_change = false; - } else { - /* if we missed more than one change or our tables are not - * in sync anymore -> request fresh tt data */ - if (ttvn != orig_ttvn || orig_node->tt_crc != tt_crc) { -request_table: - bat_dbg(DBG_TT, bat_priv, "TT inconsistency for %pM. " - "Need to retrieve the correct information " - "(ttvn: %u last_ttvn: %u crc: %u last_crc: " - "%u num_changes: %u)\n", orig_node->orig, ttvn, - orig_ttvn, tt_crc, orig_node->tt_crc, - tt_num_changes); - send_tt_request(bat_priv, orig_node, ttvn, tt_crc, - full_table); - return; - } - } -} - -static void update_route(struct bat_priv *bat_priv, - struct orig_node *orig_node, - struct neigh_node *neigh_node) +static void _update_route(struct bat_priv *bat_priv, + struct orig_node *orig_node, + struct neigh_node *neigh_node) { struct neigh_node *curr_router; @@ -170,8 +106,8 @@ static void update_route(struct bat_priv *bat_priv, neigh_node_free_ref(curr_router); } -void update_routes(struct bat_priv *bat_priv, struct orig_node *orig_node, - struct neigh_node *neigh_node) +void update_route(struct bat_priv *bat_priv, struct orig_node *orig_node, + struct neigh_node *neigh_node) { struct neigh_node *router = NULL; @@ -181,116 +117,13 @@ void update_routes(struct bat_priv *bat_priv, struct orig_node *orig_node, router = orig_node_get_router(orig_node); if (router != neigh_node) - update_route(bat_priv, orig_node, neigh_node); + _update_route(bat_priv, orig_node, neigh_node); out: if (router) neigh_node_free_ref(router); } -static int is_bidirectional_neigh(struct orig_node *orig_node, - struct orig_node *orig_neigh_node, - struct batman_packet *batman_packet, - struct hard_iface *if_incoming) -{ - struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface); - struct neigh_node *neigh_node = NULL, *tmp_neigh_node; - struct hlist_node *node; - uint8_t total_count; - uint8_t orig_eq_count, neigh_rq_count, tq_own; - int tq_asym_penalty, ret = 0; - - /* find corresponding one hop neighbor */ - rcu_read_lock(); - hlist_for_each_entry_rcu(tmp_neigh_node, node, - &orig_neigh_node->neigh_list, list) { - - if (!compare_eth(tmp_neigh_node->addr, orig_neigh_node->orig)) - continue; - - if (tmp_neigh_node->if_incoming != if_incoming) - continue; - - if (!atomic_inc_not_zero(&tmp_neigh_node->refcount)) - continue; - - neigh_node = tmp_neigh_node; - break; - } - rcu_read_unlock(); - - if (!neigh_node) - neigh_node = create_neighbor(orig_neigh_node, - orig_neigh_node, - orig_neigh_node->orig, - if_incoming); - - if (!neigh_node) - goto out; - - /* if orig_node is direct neighbour update neigh_node last_valid */ - if (orig_node == orig_neigh_node) - neigh_node->last_valid = jiffies; - - orig_node->last_valid = jiffies; - - /* find packet count of corresponding one hop neighbor */ - spin_lock_bh(&orig_node->ogm_cnt_lock); - orig_eq_count = orig_neigh_node->bcast_own_sum[if_incoming->if_num]; - neigh_rq_count = neigh_node->real_packet_count; - spin_unlock_bh(&orig_node->ogm_cnt_lock); - - /* pay attention to not get a value bigger than 100 % */ - total_count = (orig_eq_count > neigh_rq_count ? - neigh_rq_count : orig_eq_count); - - /* if we have too few packets (too less data) we set tq_own to zero */ - /* if we receive too few packets it is not considered bidirectional */ - if ((total_count < TQ_LOCAL_BIDRECT_SEND_MINIMUM) || - (neigh_rq_count < TQ_LOCAL_BIDRECT_RECV_MINIMUM)) - tq_own = 0; - else - /* neigh_node->real_packet_count is never zero as we - * only purge old information when getting new - * information */ - tq_own = (TQ_MAX_VALUE * total_count) / neigh_rq_count; - - /* - * 1 - ((1-x) ** 3), normalized to TQ_MAX_VALUE this does - * affect the nearly-symmetric links only a little, but - * punishes asymmetric links more. This will give a value - * between 0 and TQ_MAX_VALUE - */ - tq_asym_penalty = TQ_MAX_VALUE - (TQ_MAX_VALUE * - (TQ_LOCAL_WINDOW_SIZE - neigh_rq_count) * - (TQ_LOCAL_WINDOW_SIZE - neigh_rq_count) * - (TQ_LOCAL_WINDOW_SIZE - neigh_rq_count)) / - (TQ_LOCAL_WINDOW_SIZE * - TQ_LOCAL_WINDOW_SIZE * - TQ_LOCAL_WINDOW_SIZE); - - batman_packet->tq = ((batman_packet->tq * tq_own * tq_asym_penalty) / - (TQ_MAX_VALUE * TQ_MAX_VALUE)); - - bat_dbg(DBG_BATMAN, bat_priv, - "bidirectional: " - "orig = %-15pM neigh = %-15pM => own_bcast = %2i, " - "real recv = %2i, local tq: %3i, asym_penalty: %3i, " - "total tq: %3i\n", - orig_node->orig, orig_neigh_node->orig, total_count, - neigh_rq_count, tq_own, tq_asym_penalty, batman_packet->tq); - - /* if link has the minimum required transmission quality - * consider it bidirectional */ - if (batman_packet->tq >= TQ_TOTAL_BIDRECT_LIMIT) - ret = 1; - -out: - if (neigh_node) - neigh_node_free_ref(neigh_node); - return ret; -} - /* caller must hold the neigh_list_lock */ void bonding_candidate_del(struct orig_node *orig_node, struct neigh_node *neigh_node) @@ -308,8 +141,8 @@ out: return; } -static void bonding_candidate_add(struct orig_node *orig_node, - struct neigh_node *neigh_node) +void bonding_candidate_add(struct orig_node *orig_node, + struct neigh_node *neigh_node) { struct hlist_node *node; struct neigh_node *tmp_neigh_node, *router = NULL; @@ -379,162 +212,23 @@ out: } /* copy primary address for bonding */ -static void bonding_save_primary(const struct orig_node *orig_node, - struct orig_node *orig_neigh_node, - const struct batman_packet *batman_packet) +void bonding_save_primary(const struct orig_node *orig_node, + struct orig_node *orig_neigh_node, + const struct batman_ogm_packet *batman_ogm_packet) { - if (!(batman_packet->flags & PRIMARIES_FIRST_HOP)) + if (!(batman_ogm_packet->flags & PRIMARIES_FIRST_HOP)) return; memcpy(orig_neigh_node->primary_addr, orig_node->orig, ETH_ALEN); } -static void update_orig(struct bat_priv *bat_priv, struct orig_node *orig_node, - const struct ethhdr *ethhdr, - const struct batman_packet *batman_packet, - struct hard_iface *if_incoming, - const unsigned char *tt_buff, int is_duplicate) -{ - struct neigh_node *neigh_node = NULL, *tmp_neigh_node = NULL; - struct neigh_node *router = NULL; - struct orig_node *orig_node_tmp; - struct hlist_node *node; - uint8_t bcast_own_sum_orig, bcast_own_sum_neigh; - - bat_dbg(DBG_BATMAN, bat_priv, "update_originator(): " - "Searching and updating originator entry of received packet\n"); - - rcu_read_lock(); - hlist_for_each_entry_rcu(tmp_neigh_node, node, - &orig_node->neigh_list, list) { - if (compare_eth(tmp_neigh_node->addr, ethhdr->h_source) && - (tmp_neigh_node->if_incoming == if_incoming) && - atomic_inc_not_zero(&tmp_neigh_node->refcount)) { - if (neigh_node) - neigh_node_free_ref(neigh_node); - neigh_node = tmp_neigh_node; - continue; - } - - if (is_duplicate) - continue; - - spin_lock_bh(&tmp_neigh_node->tq_lock); - ring_buffer_set(tmp_neigh_node->tq_recv, - &tmp_neigh_node->tq_index, 0); - tmp_neigh_node->tq_avg = - ring_buffer_avg(tmp_neigh_node->tq_recv); - spin_unlock_bh(&tmp_neigh_node->tq_lock); - } - - if (!neigh_node) { - struct orig_node *orig_tmp; - - orig_tmp = get_orig_node(bat_priv, ethhdr->h_source); - if (!orig_tmp) - goto unlock; - - neigh_node = create_neighbor(orig_node, orig_tmp, - ethhdr->h_source, if_incoming); - - orig_node_free_ref(orig_tmp); - if (!neigh_node) - goto unlock; - } else - bat_dbg(DBG_BATMAN, bat_priv, - "Updating existing last-hop neighbor of originator\n"); - - rcu_read_unlock(); - - orig_node->flags = batman_packet->flags; - neigh_node->last_valid = jiffies; - - spin_lock_bh(&neigh_node->tq_lock); - ring_buffer_set(neigh_node->tq_recv, - &neigh_node->tq_index, - batman_packet->tq); - neigh_node->tq_avg = ring_buffer_avg(neigh_node->tq_recv); - spin_unlock_bh(&neigh_node->tq_lock); - - if (!is_duplicate) { - orig_node->last_ttl = batman_packet->ttl; - neigh_node->last_ttl = batman_packet->ttl; - } - - bonding_candidate_add(orig_node, neigh_node); - - /* if this neighbor already is our next hop there is nothing - * to change */ - router = orig_node_get_router(orig_node); - if (router == neigh_node) - goto update_tt; - - /* if this neighbor does not offer a better TQ we won't consider it */ - if (router && (router->tq_avg > neigh_node->tq_avg)) - goto update_tt; - - /* if the TQ is the same and the link not more symetric we - * won't consider it either */ - if (router && (neigh_node->tq_avg == router->tq_avg)) { - orig_node_tmp = router->orig_node; - spin_lock_bh(&orig_node_tmp->ogm_cnt_lock); - bcast_own_sum_orig = - orig_node_tmp->bcast_own_sum[if_incoming->if_num]; - spin_unlock_bh(&orig_node_tmp->ogm_cnt_lock); - - orig_node_tmp = neigh_node->orig_node; - spin_lock_bh(&orig_node_tmp->ogm_cnt_lock); - bcast_own_sum_neigh = - orig_node_tmp->bcast_own_sum[if_incoming->if_num]; - spin_unlock_bh(&orig_node_tmp->ogm_cnt_lock); - - if (bcast_own_sum_orig >= bcast_own_sum_neigh) - goto update_tt; - } - - update_routes(bat_priv, orig_node, neigh_node); - -update_tt: - /* I have to check for transtable changes only if the OGM has been - * sent through a primary interface */ - if (((batman_packet->orig != ethhdr->h_source) && - (batman_packet->ttl > 2)) || - (batman_packet->flags & PRIMARIES_FIRST_HOP)) - update_transtable(bat_priv, orig_node, tt_buff, - batman_packet->tt_num_changes, - batman_packet->ttvn, - batman_packet->tt_crc); - - if (orig_node->gw_flags != batman_packet->gw_flags) - gw_node_update(bat_priv, orig_node, batman_packet->gw_flags); - - orig_node->gw_flags = batman_packet->gw_flags; - - /* restart gateway selection if fast or late switching was enabled */ - if ((orig_node->gw_flags) && - (atomic_read(&bat_priv->gw_mode) == GW_MODE_CLIENT) && - (atomic_read(&bat_priv->gw_sel_class) > 2)) - gw_check_election(bat_priv, orig_node); - - goto out; - -unlock: - rcu_read_unlock(); -out: - if (neigh_node) - neigh_node_free_ref(neigh_node); - if (router) - neigh_node_free_ref(router); -} - /* checks whether the host restarted and is in the protection time. * returns: * 0 if the packet is to be accepted * 1 if the packet is to be ignored. */ -static int window_protected(struct bat_priv *bat_priv, - int32_t seq_num_diff, - unsigned long *last_reset) +int window_protected(struct bat_priv *bat_priv, int32_t seq_num_diff, + unsigned long *last_reset) { if ((seq_num_diff <= -TQ_LOCAL_WINDOW_SIZE) || (seq_num_diff >= EXPECTED_SEQNO_RANGE)) { @@ -552,330 +246,12 @@ static int window_protected(struct bat_priv *bat_priv, return 0; } -/* processes a batman packet for all interfaces, adjusts the sequence number and - * finds out whether it is a duplicate. - * returns: - * 1 the packet is a duplicate - * 0 the packet has not yet been received - * -1 the packet is old and has been received while the seqno window - * was protected. Caller should drop it. - */ -static int count_real_packets(const struct ethhdr *ethhdr, - const struct batman_packet *batman_packet, - const struct hard_iface *if_incoming) -{ - struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface); - struct orig_node *orig_node; - struct neigh_node *tmp_neigh_node; - struct hlist_node *node; - int is_duplicate = 0; - int32_t seq_diff; - int need_update = 0; - int set_mark, ret = -1; - - orig_node = get_orig_node(bat_priv, batman_packet->orig); - if (!orig_node) - return 0; - - spin_lock_bh(&orig_node->ogm_cnt_lock); - seq_diff = batman_packet->seqno - orig_node->last_real_seqno; - - /* signalize caller that the packet is to be dropped. */ - if (window_protected(bat_priv, seq_diff, - &orig_node->batman_seqno_reset)) - goto out; - - rcu_read_lock(); - hlist_for_each_entry_rcu(tmp_neigh_node, node, - &orig_node->neigh_list, list) { - - is_duplicate |= get_bit_status(tmp_neigh_node->real_bits, - orig_node->last_real_seqno, - batman_packet->seqno); - - if (compare_eth(tmp_neigh_node->addr, ethhdr->h_source) && - (tmp_neigh_node->if_incoming == if_incoming)) - set_mark = 1; - else - set_mark = 0; - - /* if the window moved, set the update flag. */ - need_update |= bit_get_packet(bat_priv, - tmp_neigh_node->real_bits, - seq_diff, set_mark); - - tmp_neigh_node->real_packet_count = - bit_packet_count(tmp_neigh_node->real_bits); - } - rcu_read_unlock(); - - if (need_update) { - bat_dbg(DBG_BATMAN, bat_priv, - "updating last_seqno: old %d, new %d\n", - orig_node->last_real_seqno, batman_packet->seqno); - orig_node->last_real_seqno = batman_packet->seqno; - } - - ret = is_duplicate; - -out: - spin_unlock_bh(&orig_node->ogm_cnt_lock); - orig_node_free_ref(orig_node); - return ret; -} - -void receive_bat_packet(const struct ethhdr *ethhdr, - struct batman_packet *batman_packet, - const unsigned char *tt_buff, - struct hard_iface *if_incoming) -{ - struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface); - struct hard_iface *hard_iface; - struct orig_node *orig_neigh_node, *orig_node; - struct neigh_node *router = NULL, *router_router = NULL; - struct neigh_node *orig_neigh_router = NULL; - int has_directlink_flag; - int is_my_addr = 0, is_my_orig = 0, is_my_oldorig = 0; - int is_broadcast = 0, is_bidirectional, is_single_hop_neigh; - int is_duplicate; - uint32_t if_incoming_seqno; - - /* Silently drop when the batman packet is actually not a - * correct packet. - * - * This might happen if a packet is padded (e.g. Ethernet has a - * minimum frame length of 64 byte) and the aggregation interprets - * it as an additional length. - * - * TODO: A more sane solution would be to have a bit in the - * batman_packet to detect whether the packet is the last - * packet in an aggregation. Here we expect that the padding - * is always zero (or not 0x01) - */ - if (batman_packet->packet_type != BAT_PACKET) - return; - - /* could be changed by schedule_own_packet() */ - if_incoming_seqno = atomic_read(&if_incoming->seqno); - - has_directlink_flag = (batman_packet->flags & DIRECTLINK ? 1 : 0); - - is_single_hop_neigh = (compare_eth(ethhdr->h_source, - batman_packet->orig) ? 1 : 0); - - bat_dbg(DBG_BATMAN, bat_priv, - "Received BATMAN packet via NB: %pM, IF: %s [%pM] " - "(from OG: %pM, via prev OG: %pM, seqno %d, ttvn %u, " - "crc %u, changes %u, td %d, TTL %d, V %d, IDF %d)\n", - ethhdr->h_source, if_incoming->net_dev->name, - if_incoming->net_dev->dev_addr, batman_packet->orig, - batman_packet->prev_sender, batman_packet->seqno, - batman_packet->ttvn, batman_packet->tt_crc, - batman_packet->tt_num_changes, batman_packet->tq, - batman_packet->ttl, batman_packet->version, - has_directlink_flag); - - rcu_read_lock(); - list_for_each_entry_rcu(hard_iface, &hardif_list, list) { - if (hard_iface->if_status != IF_ACTIVE) - continue; - - if (hard_iface->soft_iface != if_incoming->soft_iface) - continue; - - if (compare_eth(ethhdr->h_source, - hard_iface->net_dev->dev_addr)) - is_my_addr = 1; - - if (compare_eth(batman_packet->orig, - hard_iface->net_dev->dev_addr)) - is_my_orig = 1; - - if (compare_eth(batman_packet->prev_sender, - hard_iface->net_dev->dev_addr)) - is_my_oldorig = 1; - - if (is_broadcast_ether_addr(ethhdr->h_source)) - is_broadcast = 1; - } - rcu_read_unlock(); - - if (batman_packet->version != COMPAT_VERSION) { - bat_dbg(DBG_BATMAN, bat_priv, - "Drop packet: incompatible batman version (%i)\n", - batman_packet->version); - return; - } - - if (is_my_addr) { - bat_dbg(DBG_BATMAN, bat_priv, - "Drop packet: received my own broadcast (sender: %pM" - ")\n", - ethhdr->h_source); - return; - } - - if (is_broadcast) { - bat_dbg(DBG_BATMAN, bat_priv, "Drop packet: " - "ignoring all packets with broadcast source addr (sender: %pM" - ")\n", ethhdr->h_source); - return; - } - - if (is_my_orig) { - unsigned long *word; - int offset; - - orig_neigh_node = get_orig_node(bat_priv, ethhdr->h_source); - if (!orig_neigh_node) - return; - - /* neighbor has to indicate direct link and it has to - * come via the corresponding interface */ - /* save packet seqno for bidirectional check */ - if (has_directlink_flag && - compare_eth(if_incoming->net_dev->dev_addr, - batman_packet->orig)) { - offset = if_incoming->if_num * NUM_WORDS; - - spin_lock_bh(&orig_neigh_node->ogm_cnt_lock); - word = &(orig_neigh_node->bcast_own[offset]); - bit_mark(word, - if_incoming_seqno - batman_packet->seqno - 2); - orig_neigh_node->bcast_own_sum[if_incoming->if_num] = - bit_packet_count(word); - spin_unlock_bh(&orig_neigh_node->ogm_cnt_lock); - } - - bat_dbg(DBG_BATMAN, bat_priv, "Drop packet: " - "originator packet from myself (via neighbor)\n"); - orig_node_free_ref(orig_neigh_node); - return; - } - - if (is_my_oldorig) { - bat_dbg(DBG_BATMAN, bat_priv, - "Drop packet: ignoring all rebroadcast echos (sender: " - "%pM)\n", ethhdr->h_source); - return; - } - - orig_node = get_orig_node(bat_priv, batman_packet->orig); - if (!orig_node) - return; - - is_duplicate = count_real_packets(ethhdr, batman_packet, if_incoming); - - if (is_duplicate == -1) { - bat_dbg(DBG_BATMAN, bat_priv, - "Drop packet: packet within seqno protection time " - "(sender: %pM)\n", ethhdr->h_source); - goto out; - } - - if (batman_packet->tq == 0) { - bat_dbg(DBG_BATMAN, bat_priv, - "Drop packet: originator packet with tq equal 0\n"); - goto out; - } - - router = orig_node_get_router(orig_node); - if (router) - router_router = orig_node_get_router(router->orig_node); - - /* avoid temporary routing loops */ - if (router && router_router && - (compare_eth(router->addr, batman_packet->prev_sender)) && - !(compare_eth(batman_packet->orig, batman_packet->prev_sender)) && - (compare_eth(router->addr, router_router->addr))) { - bat_dbg(DBG_BATMAN, bat_priv, - "Drop packet: ignoring all rebroadcast packets that " - "may make me loop (sender: %pM)\n", ethhdr->h_source); - goto out; - } - - /* if sender is a direct neighbor the sender mac equals - * originator mac */ - orig_neigh_node = (is_single_hop_neigh ? - orig_node : - get_orig_node(bat_priv, ethhdr->h_source)); - if (!orig_neigh_node) - goto out; - - orig_neigh_router = orig_node_get_router(orig_neigh_node); - - /* drop packet if sender is not a direct neighbor and if we - * don't route towards it */ - if (!is_single_hop_neigh && (!orig_neigh_router)) { - bat_dbg(DBG_BATMAN, bat_priv, - "Drop packet: OGM via unknown neighbor!\n"); - goto out_neigh; - } - - is_bidirectional = is_bidirectional_neigh(orig_node, orig_neigh_node, - batman_packet, if_incoming); - - bonding_save_primary(orig_node, orig_neigh_node, batman_packet); - - /* update ranking if it is not a duplicate or has the same - * seqno and similar ttl as the non-duplicate */ - if (is_bidirectional && - (!is_duplicate || - ((orig_node->last_real_seqno == batman_packet->seqno) && - (orig_node->last_ttl - 3 <= batman_packet->ttl)))) - update_orig(bat_priv, orig_node, ethhdr, batman_packet, - if_incoming, tt_buff, is_duplicate); - - /* is single hop (direct) neighbor */ - if (is_single_hop_neigh) { - - /* mark direct link on incoming interface */ - schedule_forward_packet(orig_node, ethhdr, batman_packet, - 1, if_incoming); - - bat_dbg(DBG_BATMAN, bat_priv, "Forwarding packet: " - "rebroadcast neighbor packet with direct link flag\n"); - goto out_neigh; - } - - /* multihop originator */ - if (!is_bidirectional) { - bat_dbg(DBG_BATMAN, bat_priv, - "Drop packet: not received via bidirectional link\n"); - goto out_neigh; - } - - if (is_duplicate) { - bat_dbg(DBG_BATMAN, bat_priv, - "Drop packet: duplicate packet received\n"); - goto out_neigh; - } - - bat_dbg(DBG_BATMAN, bat_priv, - "Forwarding packet: rebroadcast originator packet\n"); - schedule_forward_packet(orig_node, ethhdr, batman_packet, - 0, if_incoming); - -out_neigh: - if ((orig_neigh_node) && (!is_single_hop_neigh)) - orig_node_free_ref(orig_neigh_node); -out: - if (router) - neigh_node_free_ref(router); - if (router_router) - neigh_node_free_ref(router_router); - if (orig_neigh_router) - neigh_node_free_ref(orig_neigh_router); - - orig_node_free_ref(orig_node); -} - -int recv_bat_packet(struct sk_buff *skb, struct hard_iface *hard_iface) +int recv_bat_ogm_packet(struct sk_buff *skb, struct hard_iface *hard_iface) { struct ethhdr *ethhdr; /* drop packet if it has not necessary minimum size */ - if (unlikely(!pskb_may_pull(skb, sizeof(struct batman_packet)))) + if (unlikely(!pskb_may_pull(skb, BATMAN_OGM_LEN))) return NET_RX_DROP; ethhdr = (struct ethhdr *)skb_mac_header(skb); @@ -898,10 +274,7 @@ int recv_bat_packet(struct sk_buff *skb, struct hard_iface *hard_iface) ethhdr = (struct ethhdr *)skb_mac_header(skb); - receive_aggr_bat_packet(ethhdr, - skb->data, - skb_headlen(skb), - hard_iface); + bat_ogm_receive(ethhdr, skb->data, skb_headlen(skb), hard_iface); kfree_skb(skb); return NET_RX_SUCCESS; @@ -1205,6 +578,7 @@ int recv_tt_query(struct sk_buff *skb, struct hard_iface *recv_if) { struct bat_priv *bat_priv = netdev_priv(recv_if->soft_iface); struct tt_query_packet *tt_query; + uint16_t tt_len; struct ethhdr *ethhdr; /* drop packet if it has not necessary minimum size */ @@ -1243,13 +617,21 @@ int recv_tt_query(struct sk_buff *skb, struct hard_iface *recv_if) } break; case TT_RESPONSE: - /* packet needs to be linearised to access the TT changes */ - if (skb_linearize(skb) < 0) - goto out; + if (is_my_mac(tt_query->dst)) { + /* packet needs to be linearized to access the TT + * changes */ + if (skb_linearize(skb) < 0) + goto out; + + tt_len = tt_query->tt_data * sizeof(struct tt_change); + + /* Ensure we have all the claimed data */ + if (unlikely(skb_headlen(skb) < + sizeof(struct tt_query_packet) + tt_len)) + goto out; - if (is_my_mac(tt_query->dst)) handle_tt_response(bat_priv, tt_query); - else { + } else { bat_dbg(DBG_TT, bat_priv, "Routing TT_RESPONSE to %pM [%c]\n", tt_query->dst, @@ -1300,7 +682,7 @@ int recv_roam_adv(struct sk_buff *skb, struct hard_iface *recv_if) roam_adv_packet->client); tt_global_add(bat_priv, orig_node, roam_adv_packet->client, - atomic_read(&orig_node->last_ttvn) + 1, true); + atomic_read(&orig_node->last_ttvn) + 1, true, false); /* Roaming phase starts: I have new information but the ttvn has not * been incremented yet. This flag will make me check all the incoming @@ -1536,7 +918,7 @@ static int check_unicast_ttvn(struct bat_priv *bat_priv, ethhdr = (struct ethhdr *)(skb->data + sizeof(struct unicast_packet)); - orig_node = transtable_search(bat_priv, ethhdr->h_dest); + orig_node = transtable_search(bat_priv, NULL, ethhdr->h_dest); if (!orig_node) { if (!is_my_client(bat_priv, ethhdr->h_dest)) diff --git a/net/batman-adv/routing.h b/net/batman-adv/routing.h index fb14e9579b19..7aaee0fb0fdc 100644 --- a/net/batman-adv/routing.h +++ b/net/batman-adv/routing.h @@ -23,19 +23,15 @@ #define _NET_BATMAN_ADV_ROUTING_H_ void slide_own_bcast_window(struct hard_iface *hard_iface); -void receive_bat_packet(const struct ethhdr *ethhdr, - struct batman_packet *batman_packet, - const unsigned char *tt_buff, - struct hard_iface *if_incoming); -void update_routes(struct bat_priv *bat_priv, struct orig_node *orig_node, - struct neigh_node *neigh_node); +void update_route(struct bat_priv *bat_priv, struct orig_node *orig_node, + struct neigh_node *neigh_node); int route_unicast_packet(struct sk_buff *skb, struct hard_iface *recv_if); int recv_icmp_packet(struct sk_buff *skb, struct hard_iface *recv_if); int recv_unicast_packet(struct sk_buff *skb, struct hard_iface *recv_if); int recv_ucast_frag_packet(struct sk_buff *skb, struct hard_iface *recv_if); int recv_bcast_packet(struct sk_buff *skb, struct hard_iface *recv_if); int recv_vis_packet(struct sk_buff *skb, struct hard_iface *recv_if); -int recv_bat_packet(struct sk_buff *skb, struct hard_iface *recv_if); +int recv_bat_ogm_packet(struct sk_buff *skb, struct hard_iface *recv_if); int recv_tt_query(struct sk_buff *skb, struct hard_iface *recv_if); int recv_roam_adv(struct sk_buff *skb, struct hard_iface *recv_if); struct neigh_node *find_router(struct bat_priv *bat_priv, @@ -43,5 +39,12 @@ struct neigh_node *find_router(struct bat_priv *bat_priv, const struct hard_iface *recv_if); void bonding_candidate_del(struct orig_node *orig_node, struct neigh_node *neigh_node); +void bonding_candidate_add(struct orig_node *orig_node, + struct neigh_node *neigh_node); +void bonding_save_primary(const struct orig_node *orig_node, + struct orig_node *orig_neigh_node, + const struct batman_ogm_packet *batman_ogm_packet); +int window_protected(struct bat_priv *bat_priv, int32_t seq_num_diff, + unsigned long *last_reset); #endif /* _NET_BATMAN_ADV_ROUTING_H_ */ diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c index 58d14472068c..8a684eb738ad 100644 --- a/net/batman-adv/send.c +++ b/net/batman-adv/send.c @@ -26,33 +26,12 @@ #include "soft-interface.h" #include "hard-interface.h" #include "vis.h" -#include "aggregation.h" #include "gateway_common.h" #include "originator.h" +#include "bat_ogm.h" static void send_outstanding_bcast_packet(struct work_struct *work); -/* apply hop penalty for a normal link */ -static uint8_t hop_penalty(uint8_t tq, const struct bat_priv *bat_priv) -{ - int hop_penalty = atomic_read(&bat_priv->hop_penalty); - return (tq * (TQ_MAX_VALUE - hop_penalty)) / (TQ_MAX_VALUE); -} - -/* when do we schedule our own packet to be sent */ -static unsigned long own_send_time(const struct bat_priv *bat_priv) -{ - return jiffies + msecs_to_jiffies( - atomic_read(&bat_priv->orig_interval) - - JITTER + (random32() % 2*JITTER)); -} - -/* when do we schedule a forwarded packet to be sent */ -static unsigned long forward_send_time(void) -{ - return jiffies + msecs_to_jiffies(random32() % (JITTER/2)); -} - /* send out an already prepared packet to the given address via the * specified batman interface */ int send_skb_packet(struct sk_buff *skb, struct hard_iface *hard_iface, @@ -99,141 +78,17 @@ send_skb_err: return NET_XMIT_DROP; } -/* Send a packet to a given interface */ -static void send_packet_to_if(struct forw_packet *forw_packet, - struct hard_iface *hard_iface) -{ - struct bat_priv *bat_priv = netdev_priv(hard_iface->soft_iface); - char *fwd_str; - uint8_t packet_num; - int16_t buff_pos; - struct batman_packet *batman_packet; - struct sk_buff *skb; - - if (hard_iface->if_status != IF_ACTIVE) - return; - - packet_num = 0; - buff_pos = 0; - batman_packet = (struct batman_packet *)forw_packet->skb->data; - - /* adjust all flags and log packets */ - while (aggregated_packet(buff_pos, - forw_packet->packet_len, - batman_packet->tt_num_changes)) { - - /* we might have aggregated direct link packets with an - * ordinary base packet */ - if ((forw_packet->direct_link_flags & (1 << packet_num)) && - (forw_packet->if_incoming == hard_iface)) - batman_packet->flags |= DIRECTLINK; - else - batman_packet->flags &= ~DIRECTLINK; - - fwd_str = (packet_num > 0 ? "Forwarding" : (forw_packet->own ? - "Sending own" : - "Forwarding")); - bat_dbg(DBG_BATMAN, bat_priv, - "%s %spacket (originator %pM, seqno %d, TQ %d, TTL %d," - " IDF %s, hvn %d) on interface %s [%pM]\n", - fwd_str, (packet_num > 0 ? "aggregated " : ""), - batman_packet->orig, ntohl(batman_packet->seqno), - batman_packet->tq, batman_packet->ttl, - (batman_packet->flags & DIRECTLINK ? - "on" : "off"), - batman_packet->ttvn, hard_iface->net_dev->name, - hard_iface->net_dev->dev_addr); - - buff_pos += sizeof(*batman_packet) + - tt_len(batman_packet->tt_num_changes); - packet_num++; - batman_packet = (struct batman_packet *) - (forw_packet->skb->data + buff_pos); - } - - /* create clone because function is called more than once */ - skb = skb_clone(forw_packet->skb, GFP_ATOMIC); - if (skb) - send_skb_packet(skb, hard_iface, broadcast_addr); -} - -/* send a batman packet */ -static void send_packet(struct forw_packet *forw_packet) -{ - struct hard_iface *hard_iface; - struct net_device *soft_iface; - struct bat_priv *bat_priv; - struct hard_iface *primary_if = NULL; - struct batman_packet *batman_packet = - (struct batman_packet *)(forw_packet->skb->data); - int directlink = (batman_packet->flags & DIRECTLINK ? 1 : 0); - - if (!forw_packet->if_incoming) { - pr_err("Error - can't forward packet: incoming iface not " - "specified\n"); - goto out; - } - - soft_iface = forw_packet->if_incoming->soft_iface; - bat_priv = netdev_priv(soft_iface); - - if (forw_packet->if_incoming->if_status != IF_ACTIVE) - goto out; - - primary_if = primary_if_get_selected(bat_priv); - if (!primary_if) - goto out; - - /* multihomed peer assumed */ - /* non-primary OGMs are only broadcasted on their interface */ - if ((directlink && (batman_packet->ttl == 1)) || - (forw_packet->own && (forw_packet->if_incoming != primary_if))) { - - /* FIXME: what about aggregated packets ? */ - bat_dbg(DBG_BATMAN, bat_priv, - "%s packet (originator %pM, seqno %d, TTL %d) " - "on interface %s [%pM]\n", - (forw_packet->own ? "Sending own" : "Forwarding"), - batman_packet->orig, ntohl(batman_packet->seqno), - batman_packet->ttl, - forw_packet->if_incoming->net_dev->name, - forw_packet->if_incoming->net_dev->dev_addr); - - /* skb is only used once and than forw_packet is free'd */ - send_skb_packet(forw_packet->skb, forw_packet->if_incoming, - broadcast_addr); - forw_packet->skb = NULL; - - goto out; - } - - /* broadcast on every interface */ - rcu_read_lock(); - list_for_each_entry_rcu(hard_iface, &hardif_list, list) { - if (hard_iface->soft_iface != soft_iface) - continue; - - send_packet_to_if(forw_packet, hard_iface); - } - rcu_read_unlock(); - -out: - if (primary_if) - hardif_free_ref(primary_if); -} - static void realloc_packet_buffer(struct hard_iface *hard_iface, - int new_len) + int new_len) { unsigned char *new_buff; - struct batman_packet *batman_packet; new_buff = kmalloc(new_len, GFP_ATOMIC); /* keep old buffer if kmalloc should fail */ if (new_buff) { memcpy(new_buff, hard_iface->packet_buff, - sizeof(*batman_packet)); + BATMAN_OGM_LEN); kfree(hard_iface->packet_buff); hard_iface->packet_buff = new_buff; @@ -242,60 +97,48 @@ static void realloc_packet_buffer(struct hard_iface *hard_iface, } /* when calling this function (hard_iface == primary_if) has to be true */ -static void prepare_packet_buffer(struct bat_priv *bat_priv, +static int prepare_packet_buffer(struct bat_priv *bat_priv, struct hard_iface *hard_iface) { int new_len; - struct batman_packet *batman_packet; - new_len = BAT_PACKET_LEN + + new_len = BATMAN_OGM_LEN + tt_len((uint8_t)atomic_read(&bat_priv->tt_local_changes)); /* if we have too many changes for one packet don't send any * and wait for the tt table request which will be fragmented */ if (new_len > hard_iface->soft_iface->mtu) - new_len = BAT_PACKET_LEN; + new_len = BATMAN_OGM_LEN; realloc_packet_buffer(hard_iface, new_len); - batman_packet = (struct batman_packet *)hard_iface->packet_buff; atomic_set(&bat_priv->tt_crc, tt_local_crc(bat_priv)); /* reset the sending counter */ atomic_set(&bat_priv->tt_ogm_append_cnt, TT_OGM_APPEND_MAX); - batman_packet->tt_num_changes = tt_changes_fill_buffer(bat_priv, - hard_iface->packet_buff + BAT_PACKET_LEN, - hard_iface->packet_len - BAT_PACKET_LEN); - + return tt_changes_fill_buffer(bat_priv, + hard_iface->packet_buff + BATMAN_OGM_LEN, + hard_iface->packet_len - BATMAN_OGM_LEN); } -static void reset_packet_buffer(struct bat_priv *bat_priv, - struct hard_iface *hard_iface) +static int reset_packet_buffer(struct bat_priv *bat_priv, + struct hard_iface *hard_iface) { - struct batman_packet *batman_packet; - - realloc_packet_buffer(hard_iface, BAT_PACKET_LEN); - - batman_packet = (struct batman_packet *)hard_iface->packet_buff; - batman_packet->tt_num_changes = 0; + realloc_packet_buffer(hard_iface, BATMAN_OGM_LEN); + return 0; } -void schedule_own_packet(struct hard_iface *hard_iface) +void schedule_bat_ogm(struct hard_iface *hard_iface) { struct bat_priv *bat_priv = netdev_priv(hard_iface->soft_iface); struct hard_iface *primary_if; - unsigned long send_time; - struct batman_packet *batman_packet; - int vis_server; + int tt_num_changes = -1; if ((hard_iface->if_status == IF_NOT_IN_USE) || (hard_iface->if_status == IF_TO_BE_REMOVED)) return; - vis_server = atomic_read(&bat_priv->vis_mode); - primary_if = primary_if_get_selected(bat_priv); - /** * the interface gets activated here to avoid race conditions between * the moment of activating the interface in @@ -306,124 +149,26 @@ void schedule_own_packet(struct hard_iface *hard_iface) if (hard_iface->if_status == IF_TO_BE_ACTIVATED) hard_iface->if_status = IF_ACTIVE; + primary_if = primary_if_get_selected(bat_priv); + if (hard_iface == primary_if) { /* if at least one change happened */ if (atomic_read(&bat_priv->tt_local_changes) > 0) { tt_commit_changes(bat_priv); - prepare_packet_buffer(bat_priv, hard_iface); + tt_num_changes = prepare_packet_buffer(bat_priv, + hard_iface); } - /* if the changes have been sent enough times */ + /* if the changes have been sent often enough */ if (!atomic_dec_not_zero(&bat_priv->tt_ogm_append_cnt)) - reset_packet_buffer(bat_priv, hard_iface); + tt_num_changes = reset_packet_buffer(bat_priv, + hard_iface); } - /** - * NOTE: packet_buff might just have been re-allocated in - * prepare_packet_buffer() or in reset_packet_buffer() - */ - batman_packet = (struct batman_packet *)hard_iface->packet_buff; - - /* change sequence number to network order */ - batman_packet->seqno = - htonl((uint32_t)atomic_read(&hard_iface->seqno)); - - batman_packet->ttvn = atomic_read(&bat_priv->ttvn); - batman_packet->tt_crc = htons((uint16_t)atomic_read(&bat_priv->tt_crc)); - - if (vis_server == VIS_TYPE_SERVER_SYNC) - batman_packet->flags |= VIS_SERVER; - else - batman_packet->flags &= ~VIS_SERVER; - - if ((hard_iface == primary_if) && - (atomic_read(&bat_priv->gw_mode) == GW_MODE_SERVER)) - batman_packet->gw_flags = - (uint8_t)atomic_read(&bat_priv->gw_bandwidth); - else - batman_packet->gw_flags = NO_FLAGS; - - atomic_inc(&hard_iface->seqno); - - slide_own_bcast_window(hard_iface); - send_time = own_send_time(bat_priv); - add_bat_packet_to_list(bat_priv, - hard_iface->packet_buff, - hard_iface->packet_len, - hard_iface, 1, send_time); - if (primary_if) hardif_free_ref(primary_if); -} - -void schedule_forward_packet(struct orig_node *orig_node, - const struct ethhdr *ethhdr, - struct batman_packet *batman_packet, - int directlink, - struct hard_iface *if_incoming) -{ - struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface); - struct neigh_node *router; - uint8_t in_tq, in_ttl, tq_avg = 0; - unsigned long send_time; - uint8_t tt_num_changes; - - if (batman_packet->ttl <= 1) { - bat_dbg(DBG_BATMAN, bat_priv, "ttl exceeded\n"); - return; - } - - router = orig_node_get_router(orig_node); - - in_tq = batman_packet->tq; - in_ttl = batman_packet->ttl; - tt_num_changes = batman_packet->tt_num_changes; - - batman_packet->ttl--; - memcpy(batman_packet->prev_sender, ethhdr->h_source, ETH_ALEN); - - /* rebroadcast tq of our best ranking neighbor to ensure the rebroadcast - * of our best tq value */ - if (router && router->tq_avg != 0) { - - /* rebroadcast ogm of best ranking neighbor as is */ - if (!compare_eth(router->addr, ethhdr->h_source)) { - batman_packet->tq = router->tq_avg; - - if (router->last_ttl) - batman_packet->ttl = router->last_ttl - 1; - } - - tq_avg = router->tq_avg; - } - - if (router) - neigh_node_free_ref(router); - - /* apply hop penalty */ - batman_packet->tq = hop_penalty(batman_packet->tq, bat_priv); - - bat_dbg(DBG_BATMAN, bat_priv, - "Forwarding packet: tq_orig: %i, tq_avg: %i, " - "tq_forw: %i, ttl_orig: %i, ttl_forw: %i\n", - in_tq, tq_avg, batman_packet->tq, in_ttl - 1, - batman_packet->ttl); - - batman_packet->seqno = htonl(batman_packet->seqno); - batman_packet->tt_crc = htons(batman_packet->tt_crc); - - /* switch of primaries first hop flag when forwarding */ - batman_packet->flags &= ~PRIMARIES_FIRST_HOP; - if (directlink) - batman_packet->flags |= DIRECTLINK; - else - batman_packet->flags &= ~DIRECTLINK; - send_time = forward_send_time(); - add_bat_packet_to_list(bat_priv, - (unsigned char *)batman_packet, - sizeof(*batman_packet) + tt_len(tt_num_changes), - if_incoming, 0, send_time); + bat_ogm_schedule(hard_iface, tt_num_changes); } static void forw_packet_free(struct forw_packet *forw_packet) @@ -454,7 +199,7 @@ static void _add_bcast_packet_to_list(struct bat_priv *bat_priv, } /* add a broadcast packet to the queue and setup timers. broadcast packets - * are sent multiple times to increase probability for beeing received. + * are sent multiple times to increase probability for being received. * * This function returns NETDEV_TX_OK on success and NETDEV_TX_BUSY on * errors. @@ -557,7 +302,7 @@ out: atomic_inc(&bat_priv->bcast_queue_left); } -void send_outstanding_bat_packet(struct work_struct *work) +void send_outstanding_bat_ogm_packet(struct work_struct *work) { struct delayed_work *delayed_work = container_of(work, struct delayed_work, work); @@ -573,7 +318,7 @@ void send_outstanding_bat_packet(struct work_struct *work) if (atomic_read(&bat_priv->mesh_state) == MESH_DEACTIVATING) goto out; - send_packet(forw_packet); + bat_ogm_emit(forw_packet); /** * we have to have at least one packet in the queue @@ -581,7 +326,7 @@ void send_outstanding_bat_packet(struct work_struct *work) * shutting down */ if (forw_packet->own) - schedule_own_packet(forw_packet->if_incoming); + schedule_bat_ogm(forw_packet->if_incoming); out: /* don't count own packet */ @@ -612,7 +357,7 @@ void purge_outstanding_packets(struct bat_priv *bat_priv, &bat_priv->forw_bcast_list, list) { /** - * if purge_outstanding_packets() was called with an argmument + * if purge_outstanding_packets() was called with an argument * we delete only packets belonging to the given interface */ if ((hard_iface) && @@ -641,7 +386,7 @@ void purge_outstanding_packets(struct bat_priv *bat_priv, &bat_priv->forw_bat_list, list) { /** - * if purge_outstanding_packets() was called with an argmument + * if purge_outstanding_packets() was called with an argument * we delete only packets belonging to the given interface */ if ((hard_iface) && diff --git a/net/batman-adv/send.h b/net/batman-adv/send.h index 1f2d1e877663..c8ca3ef7385b 100644 --- a/net/batman-adv/send.h +++ b/net/batman-adv/send.h @@ -24,15 +24,10 @@ int send_skb_packet(struct sk_buff *skb, struct hard_iface *hard_iface, const uint8_t *dst_addr); -void schedule_own_packet(struct hard_iface *hard_iface); -void schedule_forward_packet(struct orig_node *orig_node, - const struct ethhdr *ethhdr, - struct batman_packet *batman_packet, - int directlink, - struct hard_iface *if_outgoing); +void schedule_bat_ogm(struct hard_iface *hard_iface); int add_bcast_packet_to_list(struct bat_priv *bat_priv, const struct sk_buff *skb, unsigned long delay); -void send_outstanding_bat_packet(struct work_struct *work); +void send_outstanding_bat_ogm_packet(struct work_struct *work); void purge_outstanding_packets(struct bat_priv *bat_priv, const struct hard_iface *hard_iface); diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 05dd35114a27..987c75a775f9 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -445,30 +445,31 @@ static void softif_batman_recv(struct sk_buff *skb, struct net_device *dev, { struct bat_priv *bat_priv = netdev_priv(dev); struct ethhdr *ethhdr = (struct ethhdr *)skb->data; - struct batman_packet *batman_packet; + struct batman_ogm_packet *batman_ogm_packet; struct softif_neigh *softif_neigh = NULL; struct hard_iface *primary_if = NULL; struct softif_neigh *curr_softif_neigh = NULL; if (ntohs(ethhdr->h_proto) == ETH_P_8021Q) - batman_packet = (struct batman_packet *) + batman_ogm_packet = (struct batman_ogm_packet *) (skb->data + ETH_HLEN + VLAN_HLEN); else - batman_packet = (struct batman_packet *)(skb->data + ETH_HLEN); + batman_ogm_packet = (struct batman_ogm_packet *) + (skb->data + ETH_HLEN); - if (batman_packet->version != COMPAT_VERSION) + if (batman_ogm_packet->version != COMPAT_VERSION) goto out; - if (batman_packet->packet_type != BAT_PACKET) + if (batman_ogm_packet->packet_type != BAT_OGM) goto out; - if (!(batman_packet->flags & PRIMARIES_FIRST_HOP)) + if (!(batman_ogm_packet->flags & PRIMARIES_FIRST_HOP)) goto out; - if (is_my_mac(batman_packet->orig)) + if (is_my_mac(batman_ogm_packet->orig)) goto out; - softif_neigh = softif_neigh_get(bat_priv, batman_packet->orig, vid); + softif_neigh = softif_neigh_get(bat_priv, batman_ogm_packet->orig, vid); if (!softif_neigh) goto out; @@ -532,11 +533,11 @@ static int interface_set_mac_addr(struct net_device *dev, void *p) if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; - /* only modify transtable if it has been initialised before */ + /* only modify transtable if it has been initialized before */ if (atomic_read(&bat_priv->mesh_state) == MESH_ACTIVE) { tt_local_remove(bat_priv, dev->dev_addr, "mac address changed", false); - tt_local_add(dev, addr->sa_data); + tt_local_add(dev, addr->sa_data, NULL_IFINDEX); } memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN); @@ -562,10 +563,10 @@ static int interface_tx(struct sk_buff *skb, struct net_device *soft_iface) struct bcast_packet *bcast_packet; struct vlan_ethhdr *vhdr; struct softif_neigh *curr_softif_neigh = NULL; - struct orig_node *orig_node = NULL; + unsigned int header_len = 0; int data_len = skb->len, ret; short vid = -1; - bool do_bcast; + bool do_bcast = false; if (atomic_read(&bat_priv->mesh_state) != MESH_ACTIVE) goto dropped; @@ -595,18 +596,30 @@ static int interface_tx(struct sk_buff *skb, struct net_device *soft_iface) goto dropped; /* Register the client MAC in the transtable */ - tt_local_add(soft_iface, ethhdr->h_source); - - orig_node = transtable_search(bat_priv, ethhdr->h_dest); - do_bcast = is_multicast_ether_addr(ethhdr->h_dest); - if (do_bcast || (orig_node && orig_node->gw_flags)) { - ret = gw_is_target(bat_priv, skb, orig_node); - - if (ret < 0) - goto dropped; - - if (ret) - do_bcast = false; + tt_local_add(soft_iface, ethhdr->h_source, skb->skb_iif); + + if (is_multicast_ether_addr(ethhdr->h_dest)) { + do_bcast = true; + + switch (atomic_read(&bat_priv->gw_mode)) { + case GW_MODE_SERVER: + /* gateway servers should not send dhcp + * requests into the mesh */ + ret = gw_is_dhcp_target(skb, &header_len); + if (ret) + goto dropped; + break; + case GW_MODE_CLIENT: + /* gateway clients should send dhcp requests + * via unicast to their gateway */ + ret = gw_is_dhcp_target(skb, &header_len); + if (ret) + do_bcast = false; + break; + case GW_MODE_OFF: + default: + break; + } } /* ethernet packet should be broadcasted */ @@ -642,6 +655,12 @@ static int interface_tx(struct sk_buff *skb, struct net_device *soft_iface) /* unicast packet */ } else { + if (atomic_read(&bat_priv->gw_mode) != GW_MODE_OFF) { + ret = gw_out_of_range(bat_priv, skb, ethhdr); + if (ret) + goto dropped; + } + ret = unicast_send_skb(skb, bat_priv); if (ret != 0) goto dropped_freed; @@ -660,8 +679,6 @@ end: softif_neigh_free_ref(curr_softif_neigh); if (primary_if) hardif_free_ref(primary_if); - if (orig_node) - orig_node_free_ref(orig_node); return NETDEV_TX_OK; } @@ -739,6 +756,9 @@ void interface_rx(struct net_device *soft_iface, soft_iface->last_rx = jiffies; + if (is_ap_isolated(bat_priv, ethhdr->h_source, ethhdr->h_dest)) + goto dropped; + netif_rx(skb); goto out; @@ -796,10 +816,8 @@ struct net_device *softif_create(const char *name) soft_iface = alloc_netdev(sizeof(*bat_priv), name, interface_setup); - if (!soft_iface) { - pr_err("Unable to allocate the batman interface: %s\n", name); + if (!soft_iface) goto out; - } ret = register_netdevice(soft_iface); if (ret < 0) { @@ -812,6 +830,7 @@ struct net_device *softif_create(const char *name) atomic_set(&bat_priv->aggregated_ogms, 1); atomic_set(&bat_priv->bonding, 0); + atomic_set(&bat_priv->ap_isolation, 0); atomic_set(&bat_priv->vis_mode, VIS_TYPE_CLIENT_UPDATE); atomic_set(&bat_priv->gw_mode, GW_MODE_OFF); atomic_set(&bat_priv->gw_sel_class, 20); @@ -855,7 +874,7 @@ unreg_debugfs: unreg_sysfs: sysfs_del_meshif(soft_iface); unreg_soft_iface: - unregister_netdev(soft_iface); + unregister_netdevice(soft_iface); return NULL; free_soft_iface: diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index fb6931d00cd7..ab8dea8b0b2e 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -36,18 +36,9 @@ static void _tt_global_del(struct bat_priv *bat_priv, static void tt_purge(struct work_struct *work); /* returns 1 if they are the same mac addr */ -static int compare_ltt(const struct hlist_node *node, const void *data2) +static int compare_tt(const struct hlist_node *node, const void *data2) { - const void *data1 = container_of(node, struct tt_local_entry, - hash_entry); - - return (memcmp(data1, data2, ETH_ALEN) == 0 ? 1 : 0); -} - -/* returns 1 if they are the same mac addr */ -static int compare_gtt(const struct hlist_node *node, const void *data2) -{ - const void *data1 = container_of(node, struct tt_global_entry, + const void *data1 = container_of(node, struct tt_common_entry, hash_entry); return (memcmp(data1, data2, ETH_ALEN) == 0 ? 1 : 0); @@ -60,14 +51,13 @@ static void tt_start_timer(struct bat_priv *bat_priv) msecs_to_jiffies(5000)); } -static struct tt_local_entry *tt_local_hash_find(struct bat_priv *bat_priv, - const void *data) +static struct tt_common_entry *tt_hash_find(struct hashtable_t *hash, + const void *data) { - struct hashtable_t *hash = bat_priv->tt_local_hash; struct hlist_head *head; struct hlist_node *node; - struct tt_local_entry *tt_local_entry, *tt_local_entry_tmp = NULL; - int index; + struct tt_common_entry *tt_common_entry, *tt_common_entry_tmp = NULL; + uint32_t index; if (!hash) return NULL; @@ -76,51 +66,46 @@ static struct tt_local_entry *tt_local_hash_find(struct bat_priv *bat_priv, head = &hash->table[index]; rcu_read_lock(); - hlist_for_each_entry_rcu(tt_local_entry, node, head, hash_entry) { - if (!compare_eth(tt_local_entry, data)) + hlist_for_each_entry_rcu(tt_common_entry, node, head, hash_entry) { + if (!compare_eth(tt_common_entry, data)) continue; - if (!atomic_inc_not_zero(&tt_local_entry->refcount)) + if (!atomic_inc_not_zero(&tt_common_entry->refcount)) continue; - tt_local_entry_tmp = tt_local_entry; + tt_common_entry_tmp = tt_common_entry; break; } rcu_read_unlock(); - return tt_local_entry_tmp; + return tt_common_entry_tmp; } -static struct tt_global_entry *tt_global_hash_find(struct bat_priv *bat_priv, - const void *data) +static struct tt_local_entry *tt_local_hash_find(struct bat_priv *bat_priv, + const void *data) { - struct hashtable_t *hash = bat_priv->tt_global_hash; - struct hlist_head *head; - struct hlist_node *node; - struct tt_global_entry *tt_global_entry; - struct tt_global_entry *tt_global_entry_tmp = NULL; - int index; - - if (!hash) - return NULL; - - index = choose_orig(data, hash->size); - head = &hash->table[index]; + struct tt_common_entry *tt_common_entry; + struct tt_local_entry *tt_local_entry = NULL; - rcu_read_lock(); - hlist_for_each_entry_rcu(tt_global_entry, node, head, hash_entry) { - if (!compare_eth(tt_global_entry, data)) - continue; + tt_common_entry = tt_hash_find(bat_priv->tt_local_hash, data); + if (tt_common_entry) + tt_local_entry = container_of(tt_common_entry, + struct tt_local_entry, common); + return tt_local_entry; +} - if (!atomic_inc_not_zero(&tt_global_entry->refcount)) - continue; +static struct tt_global_entry *tt_global_hash_find(struct bat_priv *bat_priv, + const void *data) +{ + struct tt_common_entry *tt_common_entry; + struct tt_global_entry *tt_global_entry = NULL; - tt_global_entry_tmp = tt_global_entry; - break; - } - rcu_read_unlock(); + tt_common_entry = tt_hash_find(bat_priv->tt_global_hash, data); + if (tt_common_entry) + tt_global_entry = container_of(tt_common_entry, + struct tt_global_entry, common); + return tt_global_entry; - return tt_global_entry_tmp; } static bool is_out_of_time(unsigned long starting_time, unsigned long timeout) @@ -133,14 +118,30 @@ static bool is_out_of_time(unsigned long starting_time, unsigned long timeout) static void tt_local_entry_free_ref(struct tt_local_entry *tt_local_entry) { - if (atomic_dec_and_test(&tt_local_entry->refcount)) - kfree_rcu(tt_local_entry, rcu); + if (atomic_dec_and_test(&tt_local_entry->common.refcount)) + kfree_rcu(tt_local_entry, common.rcu); +} + +static void tt_global_entry_free_rcu(struct rcu_head *rcu) +{ + struct tt_common_entry *tt_common_entry; + struct tt_global_entry *tt_global_entry; + + tt_common_entry = container_of(rcu, struct tt_common_entry, rcu); + tt_global_entry = container_of(tt_common_entry, struct tt_global_entry, + common); + + if (tt_global_entry->orig_node) + orig_node_free_ref(tt_global_entry->orig_node); + + kfree(tt_global_entry); } static void tt_global_entry_free_ref(struct tt_global_entry *tt_global_entry) { - if (atomic_dec_and_test(&tt_global_entry->refcount)) - kfree_rcu(tt_global_entry, rcu); + if (atomic_dec_and_test(&tt_global_entry->common.refcount)) + call_rcu(&tt_global_entry->common.rcu, + tt_global_entry_free_rcu); } static void tt_local_event(struct bat_priv *bat_priv, const uint8_t *addr, @@ -183,11 +184,13 @@ static int tt_local_init(struct bat_priv *bat_priv) return 1; } -void tt_local_add(struct net_device *soft_iface, const uint8_t *addr) +void tt_local_add(struct net_device *soft_iface, const uint8_t *addr, + int ifindex) { struct bat_priv *bat_priv = netdev_priv(soft_iface); struct tt_local_entry *tt_local_entry = NULL; struct tt_global_entry *tt_global_entry = NULL; + int hash_added; tt_local_entry = tt_local_hash_find(bat_priv, addr); @@ -204,24 +207,33 @@ void tt_local_add(struct net_device *soft_iface, const uint8_t *addr) "Creating new local tt entry: %pM (ttvn: %d)\n", addr, (uint8_t)atomic_read(&bat_priv->ttvn)); - memcpy(tt_local_entry->addr, addr, ETH_ALEN); + memcpy(tt_local_entry->common.addr, addr, ETH_ALEN); + tt_local_entry->common.flags = NO_FLAGS; + if (is_wifi_iface(ifindex)) + tt_local_entry->common.flags |= TT_CLIENT_WIFI; + atomic_set(&tt_local_entry->common.refcount, 2); tt_local_entry->last_seen = jiffies; - tt_local_entry->flags = NO_FLAGS; - atomic_set(&tt_local_entry->refcount, 2); /* the batman interface mac address should never be purged */ if (compare_eth(addr, soft_iface->dev_addr)) - tt_local_entry->flags |= TT_CLIENT_NOPURGE; + tt_local_entry->common.flags |= TT_CLIENT_NOPURGE; + + hash_added = hash_add(bat_priv->tt_local_hash, compare_tt, choose_orig, + &tt_local_entry->common, + &tt_local_entry->common.hash_entry); - tt_local_event(bat_priv, addr, tt_local_entry->flags); + if (unlikely(hash_added != 0)) { + /* remove the reference for the hash */ + tt_local_entry_free_ref(tt_local_entry); + goto out; + } + + tt_local_event(bat_priv, addr, tt_local_entry->common.flags); /* The local entry has to be marked as NEW to avoid to send it in * a full table response going out before the next ttvn increment * (consistency check) */ - tt_local_entry->flags |= TT_CLIENT_NEW; - - hash_add(bat_priv->tt_local_hash, compare_ltt, choose_orig, - tt_local_entry, &tt_local_entry->hash_entry); + tt_local_entry->common.flags |= TT_CLIENT_NEW; /* remove address from global hash if present */ tt_global_entry = tt_global_hash_find(bat_priv, addr); @@ -230,10 +242,11 @@ void tt_local_add(struct net_device *soft_iface, const uint8_t *addr) if (tt_global_entry) { /* This node is probably going to update its tt table */ tt_global_entry->orig_node->tt_poss_change = true; - /* The global entry has to be marked as PENDING and has to be + /* The global entry has to be marked as ROAMING and has to be * kept for consistency purpose */ - tt_global_entry->flags |= TT_CLIENT_PENDING; - send_roam_adv(bat_priv, tt_global_entry->addr, + tt_global_entry->common.flags |= TT_CLIENT_ROAM; + tt_global_entry->roam_at = jiffies; + send_roam_adv(bat_priv, tt_global_entry->common.addr, tt_global_entry->orig_node); } out: @@ -295,13 +308,12 @@ int tt_local_seq_print_text(struct seq_file *seq, void *offset) struct net_device *net_dev = (struct net_device *)seq->private; struct bat_priv *bat_priv = netdev_priv(net_dev); struct hashtable_t *hash = bat_priv->tt_local_hash; - struct tt_local_entry *tt_local_entry; + struct tt_common_entry *tt_common_entry; struct hard_iface *primary_if; struct hlist_node *node; struct hlist_head *head; - size_t buf_size, pos; - char *buff; - int i, ret = 0; + uint32_t i; + int ret = 0; primary_if = primary_if_get_selected(bat_priv); if (!primary_if) { @@ -322,40 +334,27 @@ int tt_local_seq_print_text(struct seq_file *seq, void *offset) "announced via TT (TTVN: %u):\n", net_dev->name, (uint8_t)atomic_read(&bat_priv->ttvn)); - buf_size = 1; - /* Estimate length for: " * xx:xx:xx:xx:xx:xx\n" */ - for (i = 0; i < hash->size; i++) { - head = &hash->table[i]; - - rcu_read_lock(); - __hlist_for_each_rcu(node, head) - buf_size += 21; - rcu_read_unlock(); - } - - buff = kmalloc(buf_size, GFP_ATOMIC); - if (!buff) { - ret = -ENOMEM; - goto out; - } - - buff[0] = '\0'; - pos = 0; - for (i = 0; i < hash->size; i++) { head = &hash->table[i]; rcu_read_lock(); - hlist_for_each_entry_rcu(tt_local_entry, node, + hlist_for_each_entry_rcu(tt_common_entry, node, head, hash_entry) { - pos += snprintf(buff + pos, 22, " * %pM\n", - tt_local_entry->addr); + seq_printf(seq, " * %pM [%c%c%c%c%c]\n", + tt_common_entry->addr, + (tt_common_entry->flags & + TT_CLIENT_ROAM ? 'R' : '.'), + (tt_common_entry->flags & + TT_CLIENT_NOPURGE ? 'P' : '.'), + (tt_common_entry->flags & + TT_CLIENT_NEW ? 'N' : '.'), + (tt_common_entry->flags & + TT_CLIENT_PENDING ? 'X' : '.'), + (tt_common_entry->flags & + TT_CLIENT_WIFI ? 'W' : '.')); } rcu_read_unlock(); } - - seq_printf(seq, "%s", buff); - kfree(buff); out: if (primary_if) hardif_free_ref(primary_if); @@ -366,13 +365,13 @@ static void tt_local_set_pending(struct bat_priv *bat_priv, struct tt_local_entry *tt_local_entry, uint16_t flags) { - tt_local_event(bat_priv, tt_local_entry->addr, - tt_local_entry->flags | flags); + tt_local_event(bat_priv, tt_local_entry->common.addr, + tt_local_entry->common.flags | flags); - /* The local client has to be merked as "pending to be removed" but has - * to be kept in the table in order to send it in an full tables + /* The local client has to be marked as "pending to be removed" but has + * to be kept in the table in order to send it in a full table * response issued before the net ttvn increment (consistency check) */ - tt_local_entry->flags |= TT_CLIENT_PENDING; + tt_local_entry->common.flags |= TT_CLIENT_PENDING; } void tt_local_remove(struct bat_priv *bat_priv, const uint8_t *addr, @@ -388,7 +387,7 @@ void tt_local_remove(struct bat_priv *bat_priv, const uint8_t *addr, (roaming ? TT_CLIENT_ROAM : NO_FLAGS)); bat_dbg(DBG_TT, bat_priv, "Local tt entry (%pM) pending to be removed: " - "%s\n", tt_local_entry->addr, message); + "%s\n", tt_local_entry->common.addr, message); out: if (tt_local_entry) tt_local_entry_free_ref(tt_local_entry); @@ -398,23 +397,27 @@ static void tt_local_purge(struct bat_priv *bat_priv) { struct hashtable_t *hash = bat_priv->tt_local_hash; struct tt_local_entry *tt_local_entry; + struct tt_common_entry *tt_common_entry; struct hlist_node *node, *node_tmp; struct hlist_head *head; spinlock_t *list_lock; /* protects write access to the hash lists */ - int i; + uint32_t i; for (i = 0; i < hash->size; i++) { head = &hash->table[i]; list_lock = &hash->list_locks[i]; spin_lock_bh(list_lock); - hlist_for_each_entry_safe(tt_local_entry, node, node_tmp, + hlist_for_each_entry_safe(tt_common_entry, node, node_tmp, head, hash_entry) { - if (tt_local_entry->flags & TT_CLIENT_NOPURGE) + tt_local_entry = container_of(tt_common_entry, + struct tt_local_entry, + common); + if (tt_local_entry->common.flags & TT_CLIENT_NOPURGE) continue; /* entry already marked for deletion */ - if (tt_local_entry->flags & TT_CLIENT_PENDING) + if (tt_local_entry->common.flags & TT_CLIENT_PENDING) continue; if (!is_out_of_time(tt_local_entry->last_seen, @@ -425,7 +428,7 @@ static void tt_local_purge(struct bat_priv *bat_priv) TT_CLIENT_DEL); bat_dbg(DBG_TT, bat_priv, "Local tt entry (%pM) " "pending to be removed: timed out\n", - tt_local_entry->addr); + tt_local_entry->common.addr); } spin_unlock_bh(list_lock); } @@ -436,10 +439,11 @@ static void tt_local_table_free(struct bat_priv *bat_priv) { struct hashtable_t *hash; spinlock_t *list_lock; /* protects write access to the hash lists */ + struct tt_common_entry *tt_common_entry; struct tt_local_entry *tt_local_entry; struct hlist_node *node, *node_tmp; struct hlist_head *head; - int i; + uint32_t i; if (!bat_priv->tt_local_hash) return; @@ -451,9 +455,12 @@ static void tt_local_table_free(struct bat_priv *bat_priv) list_lock = &hash->list_locks[i]; spin_lock_bh(list_lock); - hlist_for_each_entry_safe(tt_local_entry, node, node_tmp, + hlist_for_each_entry_safe(tt_common_entry, node, node_tmp, head, hash_entry) { hlist_del_rcu(node); + tt_local_entry = container_of(tt_common_entry, + struct tt_local_entry, + common); tt_local_entry_free_ref(tt_local_entry); } spin_unlock_bh(list_lock); @@ -495,11 +502,13 @@ static void tt_changes_list_free(struct bat_priv *bat_priv) /* caller must hold orig_node refcount */ int tt_global_add(struct bat_priv *bat_priv, struct orig_node *orig_node, - const unsigned char *tt_addr, uint8_t ttvn, bool roaming) + const unsigned char *tt_addr, uint8_t ttvn, bool roaming, + bool wifi) { struct tt_global_entry *tt_global_entry; struct orig_node *orig_node_tmp; int ret = 0; + int hash_added; tt_global_entry = tt_global_hash_find(bat_priv, tt_addr); @@ -510,18 +519,24 @@ int tt_global_add(struct bat_priv *bat_priv, struct orig_node *orig_node, if (!tt_global_entry) goto out; - memcpy(tt_global_entry->addr, tt_addr, ETH_ALEN); + memcpy(tt_global_entry->common.addr, tt_addr, ETH_ALEN); + tt_global_entry->common.flags = NO_FLAGS; + atomic_set(&tt_global_entry->common.refcount, 2); /* Assign the new orig_node */ atomic_inc(&orig_node->refcount); tt_global_entry->orig_node = orig_node; tt_global_entry->ttvn = ttvn; - tt_global_entry->flags = NO_FLAGS; tt_global_entry->roam_at = 0; - atomic_set(&tt_global_entry->refcount, 2); - hash_add(bat_priv->tt_global_hash, compare_gtt, - choose_orig, tt_global_entry, - &tt_global_entry->hash_entry); + hash_added = hash_add(bat_priv->tt_global_hash, compare_tt, + choose_orig, &tt_global_entry->common, + &tt_global_entry->common.hash_entry); + + if (unlikely(hash_added != 0)) { + /* remove the reference for the hash */ + tt_global_entry_free_ref(tt_global_entry); + goto out_remove; + } atomic_inc(&orig_node->tt_size); } else { if (tt_global_entry->orig_node != orig_node) { @@ -532,17 +547,21 @@ int tt_global_add(struct bat_priv *bat_priv, struct orig_node *orig_node, orig_node_free_ref(orig_node_tmp); atomic_inc(&orig_node->tt_size); } + tt_global_entry->common.flags = NO_FLAGS; tt_global_entry->ttvn = ttvn; - tt_global_entry->flags = NO_FLAGS; tt_global_entry->roam_at = 0; } + if (wifi) + tt_global_entry->common.flags |= TT_CLIENT_WIFI; + bat_dbg(DBG_TT, bat_priv, "Creating new global tt entry: %pM (via %pM)\n", - tt_global_entry->addr, orig_node->orig); + tt_global_entry->common.addr, orig_node->orig); +out_remove: /* remove address from local hash if present */ - tt_local_remove(bat_priv, tt_global_entry->addr, + tt_local_remove(bat_priv, tt_global_entry->common.addr, "global tt received", roaming); ret = 1; out: @@ -556,13 +575,13 @@ int tt_global_seq_print_text(struct seq_file *seq, void *offset) struct net_device *net_dev = (struct net_device *)seq->private; struct bat_priv *bat_priv = netdev_priv(net_dev); struct hashtable_t *hash = bat_priv->tt_global_hash; + struct tt_common_entry *tt_common_entry; struct tt_global_entry *tt_global_entry; struct hard_iface *primary_if; struct hlist_node *node; struct hlist_head *head; - size_t buf_size, pos; - char *buff; - int i, ret = 0; + uint32_t i; + int ret = 0; primary_if = primary_if_get_selected(bat_priv); if (!primary_if) { @@ -582,50 +601,35 @@ int tt_global_seq_print_text(struct seq_file *seq, void *offset) seq_printf(seq, "Globally announced TT entries received via the mesh %s\n", net_dev->name); - seq_printf(seq, " %-13s %s %-15s %s\n", - "Client", "(TTVN)", "Originator", "(Curr TTVN)"); - - buf_size = 1; - /* Estimate length for: " * xx:xx:xx:xx:xx:xx (ttvn) via - * xx:xx:xx:xx:xx:xx (cur_ttvn)\n"*/ - for (i = 0; i < hash->size; i++) { - head = &hash->table[i]; - - rcu_read_lock(); - __hlist_for_each_rcu(node, head) - buf_size += 59; - rcu_read_unlock(); - } - - buff = kmalloc(buf_size, GFP_ATOMIC); - if (!buff) { - ret = -ENOMEM; - goto out; - } - - buff[0] = '\0'; - pos = 0; + seq_printf(seq, " %-13s %s %-15s %s %s\n", + "Client", "(TTVN)", "Originator", "(Curr TTVN)", "Flags"); for (i = 0; i < hash->size; i++) { head = &hash->table[i]; rcu_read_lock(); - hlist_for_each_entry_rcu(tt_global_entry, node, + hlist_for_each_entry_rcu(tt_common_entry, node, head, hash_entry) { - pos += snprintf(buff + pos, 61, - " * %pM (%3u) via %pM (%3u)\n", - tt_global_entry->addr, + tt_global_entry = container_of(tt_common_entry, + struct tt_global_entry, + common); + seq_printf(seq, " * %pM (%3u) via %pM (%3u) " + "[%c%c%c]\n", + tt_global_entry->common.addr, tt_global_entry->ttvn, tt_global_entry->orig_node->orig, (uint8_t) atomic_read( &tt_global_entry->orig_node-> - last_ttvn)); + last_ttvn), + (tt_global_entry->common.flags & + TT_CLIENT_ROAM ? 'R' : '.'), + (tt_global_entry->common.flags & + TT_CLIENT_PENDING ? 'X' : '.'), + (tt_global_entry->common.flags & + TT_CLIENT_WIFI ? 'W' : '.')); } rcu_read_unlock(); } - - seq_printf(seq, "%s", buff); - kfree(buff); out: if (primary_if) hardif_free_ref(primary_if); @@ -641,13 +645,13 @@ static void _tt_global_del(struct bat_priv *bat_priv, bat_dbg(DBG_TT, bat_priv, "Deleting global tt entry %pM (via %pM): %s\n", - tt_global_entry->addr, tt_global_entry->orig_node->orig, + tt_global_entry->common.addr, tt_global_entry->orig_node->orig, message); atomic_dec(&tt_global_entry->orig_node->tt_size); - hash_remove(bat_priv->tt_global_hash, compare_gtt, choose_orig, - tt_global_entry->addr); + hash_remove(bat_priv->tt_global_hash, compare_tt, choose_orig, + tt_global_entry->common.addr); out: if (tt_global_entry) tt_global_entry_free_ref(tt_global_entry); @@ -658,6 +662,7 @@ void tt_global_del(struct bat_priv *bat_priv, const char *message, bool roaming) { struct tt_global_entry *tt_global_entry = NULL; + struct tt_local_entry *tt_local_entry = NULL; tt_global_entry = tt_global_hash_find(bat_priv, addr); if (!tt_global_entry) @@ -665,40 +670,62 @@ void tt_global_del(struct bat_priv *bat_priv, if (tt_global_entry->orig_node == orig_node) { if (roaming) { - tt_global_entry->flags |= TT_CLIENT_ROAM; - tt_global_entry->roam_at = jiffies; - goto out; + /* if we are deleting a global entry due to a roam + * event, there are two possibilities: + * 1) the client roamed from node A to node B => we mark + * it with TT_CLIENT_ROAM, we start a timer and we + * wait for node B to claim it. In case of timeout + * the entry is purged. + * 2) the client roamed to us => we can directly delete + * the global entry, since it is useless now. */ + tt_local_entry = tt_local_hash_find(bat_priv, + tt_global_entry->common.addr); + if (!tt_local_entry) { + tt_global_entry->common.flags |= TT_CLIENT_ROAM; + tt_global_entry->roam_at = jiffies; + goto out; + } } _tt_global_del(bat_priv, tt_global_entry, message); } out: if (tt_global_entry) tt_global_entry_free_ref(tt_global_entry); + if (tt_local_entry) + tt_local_entry_free_ref(tt_local_entry); } void tt_global_del_orig(struct bat_priv *bat_priv, struct orig_node *orig_node, const char *message) { struct tt_global_entry *tt_global_entry; - int i; + struct tt_common_entry *tt_common_entry; + uint32_t i; struct hashtable_t *hash = bat_priv->tt_global_hash; struct hlist_node *node, *safe; struct hlist_head *head; spinlock_t *list_lock; /* protects write access to the hash lists */ + if (!hash) + return; + for (i = 0; i < hash->size; i++) { head = &hash->table[i]; list_lock = &hash->list_locks[i]; spin_lock_bh(list_lock); - hlist_for_each_entry_safe(tt_global_entry, node, safe, + hlist_for_each_entry_safe(tt_common_entry, node, safe, head, hash_entry) { + tt_global_entry = container_of(tt_common_entry, + struct tt_global_entry, + common); if (tt_global_entry->orig_node == orig_node) { bat_dbg(DBG_TT, bat_priv, "Deleting global tt entry %pM " - "(via %pM): originator time out\n", - tt_global_entry->addr, - tt_global_entry->orig_node->orig); + "(via %pM): %s\n", + tt_global_entry->common.addr, + tt_global_entry->orig_node->orig, + message); hlist_del_rcu(node); tt_global_entry_free_ref(tt_global_entry); } @@ -711,20 +738,24 @@ void tt_global_del_orig(struct bat_priv *bat_priv, static void tt_global_roam_purge(struct bat_priv *bat_priv) { struct hashtable_t *hash = bat_priv->tt_global_hash; + struct tt_common_entry *tt_common_entry; struct tt_global_entry *tt_global_entry; struct hlist_node *node, *node_tmp; struct hlist_head *head; spinlock_t *list_lock; /* protects write access to the hash lists */ - int i; + uint32_t i; for (i = 0; i < hash->size; i++) { head = &hash->table[i]; list_lock = &hash->list_locks[i]; spin_lock_bh(list_lock); - hlist_for_each_entry_safe(tt_global_entry, node, node_tmp, + hlist_for_each_entry_safe(tt_common_entry, node, node_tmp, head, hash_entry) { - if (!(tt_global_entry->flags & TT_CLIENT_ROAM)) + tt_global_entry = container_of(tt_common_entry, + struct tt_global_entry, + common); + if (!(tt_global_entry->common.flags & TT_CLIENT_ROAM)) continue; if (!is_out_of_time(tt_global_entry->roam_at, TT_CLIENT_ROAM_TIMEOUT * 1000)) @@ -732,7 +763,7 @@ static void tt_global_roam_purge(struct bat_priv *bat_priv) bat_dbg(DBG_TT, bat_priv, "Deleting global " "tt entry (%pM): Roaming timeout\n", - tt_global_entry->addr); + tt_global_entry->common.addr); atomic_dec(&tt_global_entry->orig_node->tt_size); hlist_del_rcu(node); tt_global_entry_free_ref(tt_global_entry); @@ -746,10 +777,11 @@ static void tt_global_table_free(struct bat_priv *bat_priv) { struct hashtable_t *hash; spinlock_t *list_lock; /* protects write access to the hash lists */ + struct tt_common_entry *tt_common_entry; struct tt_global_entry *tt_global_entry; struct hlist_node *node, *node_tmp; struct hlist_head *head; - int i; + uint32_t i; if (!bat_priv->tt_global_hash) return; @@ -761,9 +793,12 @@ static void tt_global_table_free(struct bat_priv *bat_priv) list_lock = &hash->list_locks[i]; spin_lock_bh(list_lock); - hlist_for_each_entry_safe(tt_global_entry, node, node_tmp, + hlist_for_each_entry_safe(tt_common_entry, node, node_tmp, head, hash_entry) { hlist_del_rcu(node); + tt_global_entry = container_of(tt_common_entry, + struct tt_global_entry, + common); tt_global_entry_free_ref(tt_global_entry); } spin_unlock_bh(list_lock); @@ -774,30 +809,56 @@ static void tt_global_table_free(struct bat_priv *bat_priv) bat_priv->tt_global_hash = NULL; } +static bool _is_ap_isolated(struct tt_local_entry *tt_local_entry, + struct tt_global_entry *tt_global_entry) +{ + bool ret = false; + + if (tt_local_entry->common.flags & TT_CLIENT_WIFI && + tt_global_entry->common.flags & TT_CLIENT_WIFI) + ret = true; + + return ret; +} + struct orig_node *transtable_search(struct bat_priv *bat_priv, - const uint8_t *addr) + const uint8_t *src, const uint8_t *addr) { - struct tt_global_entry *tt_global_entry; + struct tt_local_entry *tt_local_entry = NULL; + struct tt_global_entry *tt_global_entry = NULL; struct orig_node *orig_node = NULL; - tt_global_entry = tt_global_hash_find(bat_priv, addr); + if (src && atomic_read(&bat_priv->ap_isolation)) { + tt_local_entry = tt_local_hash_find(bat_priv, src); + if (!tt_local_entry) + goto out; + } + tt_global_entry = tt_global_hash_find(bat_priv, addr); if (!tt_global_entry) goto out; + /* check whether the clients should not communicate due to AP + * isolation */ + if (tt_local_entry && _is_ap_isolated(tt_local_entry, tt_global_entry)) + goto out; + if (!atomic_inc_not_zero(&tt_global_entry->orig_node->refcount)) - goto free_tt; + goto out; /* A global client marked as PENDING has already moved from that * originator */ - if (tt_global_entry->flags & TT_CLIENT_PENDING) - goto free_tt; + if (tt_global_entry->common.flags & TT_CLIENT_PENDING) + goto out; orig_node = tt_global_entry->orig_node; -free_tt: - tt_global_entry_free_ref(tt_global_entry); out: + if (tt_global_entry) + tt_global_entry_free_ref(tt_global_entry); + if (tt_local_entry) + tt_local_entry_free_ref(tt_local_entry); + return orig_node; } @@ -806,29 +867,34 @@ uint16_t tt_global_crc(struct bat_priv *bat_priv, struct orig_node *orig_node) { uint16_t total = 0, total_one; struct hashtable_t *hash = bat_priv->tt_global_hash; + struct tt_common_entry *tt_common_entry; struct tt_global_entry *tt_global_entry; struct hlist_node *node; struct hlist_head *head; - int i, j; + uint32_t i; + int j; for (i = 0; i < hash->size; i++) { head = &hash->table[i]; rcu_read_lock(); - hlist_for_each_entry_rcu(tt_global_entry, node, + hlist_for_each_entry_rcu(tt_common_entry, node, head, hash_entry) { + tt_global_entry = container_of(tt_common_entry, + struct tt_global_entry, + common); if (compare_eth(tt_global_entry->orig_node, orig_node)) { /* Roaming clients are in the global table for * consistency only. They don't have to be * taken into account while computing the * global crc */ - if (tt_global_entry->flags & TT_CLIENT_ROAM) + if (tt_common_entry->flags & TT_CLIENT_ROAM) continue; total_one = 0; for (j = 0; j < ETH_ALEN; j++) total_one = crc16_byte(total_one, - tt_global_entry->addr[j]); + tt_common_entry->addr[j]); total ^= total_one; } } @@ -843,25 +909,26 @@ uint16_t tt_local_crc(struct bat_priv *bat_priv) { uint16_t total = 0, total_one; struct hashtable_t *hash = bat_priv->tt_local_hash; - struct tt_local_entry *tt_local_entry; + struct tt_common_entry *tt_common_entry; struct hlist_node *node; struct hlist_head *head; - int i, j; + uint32_t i; + int j; for (i = 0; i < hash->size; i++) { head = &hash->table[i]; rcu_read_lock(); - hlist_for_each_entry_rcu(tt_local_entry, node, + hlist_for_each_entry_rcu(tt_common_entry, node, head, hash_entry) { /* not yet committed clients have not to be taken into * account while computing the CRC */ - if (tt_local_entry->flags & TT_CLIENT_NEW) + if (tt_common_entry->flags & TT_CLIENT_NEW) continue; total_one = 0; for (j = 0; j < ETH_ALEN; j++) total_one = crc16_byte(total_one, - tt_local_entry->addr[j]); + tt_common_entry->addr[j]); total ^= total_one; } rcu_read_unlock(); @@ -950,21 +1017,25 @@ unlock: /* data_ptr is useless here, but has to be kept to respect the prototype */ static int tt_local_valid_entry(const void *entry_ptr, const void *data_ptr) { - const struct tt_local_entry *tt_local_entry = entry_ptr; + const struct tt_common_entry *tt_common_entry = entry_ptr; - if (tt_local_entry->flags & TT_CLIENT_NEW) + if (tt_common_entry->flags & TT_CLIENT_NEW) return 0; return 1; } static int tt_global_valid_entry(const void *entry_ptr, const void *data_ptr) { - const struct tt_global_entry *tt_global_entry = entry_ptr; + const struct tt_common_entry *tt_common_entry = entry_ptr; + const struct tt_global_entry *tt_global_entry; const struct orig_node *orig_node = data_ptr; - if (tt_global_entry->flags & TT_CLIENT_ROAM) + if (tt_common_entry->flags & TT_CLIENT_ROAM) return 0; + tt_global_entry = container_of(tt_common_entry, struct tt_global_entry, + common); + return (tt_global_entry->orig_node == orig_node); } @@ -975,7 +1046,7 @@ static struct sk_buff *tt_response_fill_table(uint16_t tt_len, uint8_t ttvn, const void *), void *cb_data) { - struct tt_local_entry *tt_local_entry; + struct tt_common_entry *tt_common_entry; struct tt_query_packet *tt_response; struct tt_change *tt_change; struct hlist_node *node; @@ -983,7 +1054,7 @@ static struct sk_buff *tt_response_fill_table(uint16_t tt_len, uint8_t ttvn, struct sk_buff *skb = NULL; uint16_t tt_tot, tt_count; ssize_t tt_query_size = sizeof(struct tt_query_packet); - int i; + uint32_t i; if (tt_query_size + tt_len > primary_if->soft_iface->mtu) { tt_len = primary_if->soft_iface->mtu - tt_query_size; @@ -999,7 +1070,6 @@ static struct sk_buff *tt_response_fill_table(uint16_t tt_len, uint8_t ttvn, tt_response = (struct tt_query_packet *)skb_put(skb, tt_query_size + tt_len); tt_response->ttvn = ttvn; - tt_response->tt_data = htons(tt_tot); tt_change = (struct tt_change *)(skb->data + tt_query_size); tt_count = 0; @@ -1008,15 +1078,16 @@ static struct sk_buff *tt_response_fill_table(uint16_t tt_len, uint8_t ttvn, for (i = 0; i < hash->size; i++) { head = &hash->table[i]; - hlist_for_each_entry_rcu(tt_local_entry, node, + hlist_for_each_entry_rcu(tt_common_entry, node, head, hash_entry) { if (tt_count == tt_tot) break; - if ((valid_cb) && (!valid_cb(tt_local_entry, cb_data))) + if ((valid_cb) && (!valid_cb(tt_common_entry, cb_data))) continue; - memcpy(tt_change->addr, tt_local_entry->addr, ETH_ALEN); + memcpy(tt_change->addr, tt_common_entry->addr, + ETH_ALEN); tt_change->flags = NO_FLAGS; tt_count++; @@ -1025,12 +1096,17 @@ static struct sk_buff *tt_response_fill_table(uint16_t tt_len, uint8_t ttvn, } rcu_read_unlock(); + /* store in the message the number of entries we have successfully + * copied */ + tt_response->tt_data = htons(tt_count); + out: return skb; } -int send_tt_request(struct bat_priv *bat_priv, struct orig_node *dst_orig_node, - uint8_t ttvn, uint16_t tt_crc, bool full_table) +static int send_tt_request(struct bat_priv *bat_priv, + struct orig_node *dst_orig_node, + uint8_t ttvn, uint16_t tt_crc, bool full_table) { struct sk_buff *skb = NULL; struct tt_query_packet *tt_request; @@ -1118,11 +1194,11 @@ static bool send_other_tt_response(struct bat_priv *bat_priv, (tt_request->flags & TT_FULL_TABLE ? 'F' : '.')); /* Let's get the orig node of the REAL destination */ - req_dst_orig_node = get_orig_node(bat_priv, tt_request->dst); + req_dst_orig_node = orig_hash_find(bat_priv, tt_request->dst); if (!req_dst_orig_node) goto out; - res_dst_orig_node = get_orig_node(bat_priv, tt_request->src); + res_dst_orig_node = orig_hash_find(bat_priv, tt_request->src); if (!res_dst_orig_node) goto out; @@ -1137,12 +1213,12 @@ static bool send_other_tt_response(struct bat_priv *bat_priv, orig_ttvn = (uint8_t)atomic_read(&req_dst_orig_node->last_ttvn); req_ttvn = tt_request->ttvn; - /* I have not the requested data */ + /* I don't have the requested data */ if (orig_ttvn != req_ttvn || tt_request->tt_data != req_dst_orig_node->tt_crc) goto out; - /* If it has explicitly been requested the full table */ + /* If the full table has been explicitly requested */ if (tt_request->flags & TT_FULL_TABLE || !req_dst_orig_node->tt_buff) full_table = true; @@ -1248,7 +1324,7 @@ static bool send_my_tt_response(struct bat_priv *bat_priv, my_ttvn = (uint8_t)atomic_read(&bat_priv->ttvn); req_ttvn = tt_request->ttvn; - orig_node = get_orig_node(bat_priv, tt_request->src); + orig_node = orig_hash_find(bat_priv, tt_request->src); if (!orig_node) goto out; @@ -1363,7 +1439,9 @@ static void _tt_update_changes(struct bat_priv *bat_priv, (tt_change + i)->flags & TT_CLIENT_ROAM); else if (!tt_global_add(bat_priv, orig_node, - (tt_change + i)->addr, ttvn, false)) + (tt_change + i)->addr, ttvn, false, + (tt_change + i)->flags & + TT_CLIENT_WIFI)) /* In case of problem while storing a * global_entry, we stop the updating * procedure without committing the @@ -1403,9 +1481,10 @@ out: orig_node_free_ref(orig_node); } -void tt_update_changes(struct bat_priv *bat_priv, struct orig_node *orig_node, - uint16_t tt_num_changes, uint8_t ttvn, - struct tt_change *tt_change) +static void tt_update_changes(struct bat_priv *bat_priv, + struct orig_node *orig_node, + uint16_t tt_num_changes, uint8_t ttvn, + struct tt_change *tt_change) { _tt_update_changes(bat_priv, orig_node, tt_change, tt_num_changes, ttvn); @@ -1425,7 +1504,7 @@ bool is_my_client(struct bat_priv *bat_priv, const uint8_t *addr) goto out; /* Check if the client has been logically deleted (but is kept for * consistency purpose) */ - if (tt_local_entry->flags & TT_CLIENT_PENDING) + if (tt_local_entry->common.flags & TT_CLIENT_PENDING) goto out; ret = true; out: @@ -1648,43 +1727,53 @@ void tt_free(struct bat_priv *bat_priv) kfree(bat_priv->tt_buff); } -/* This function will reset the specified flags from all the entries in - * the given hash table and will increment num_local_tt for each involved - * entry */ -static void tt_local_reset_flags(struct bat_priv *bat_priv, uint16_t flags) +/* This function will enable or disable the specified flags for all the entries + * in the given hash table and returns the number of modified entries */ +static uint16_t tt_set_flags(struct hashtable_t *hash, uint16_t flags, + bool enable) { - int i; - struct hashtable_t *hash = bat_priv->tt_local_hash; + uint32_t i; + uint16_t changed_num = 0; struct hlist_head *head; struct hlist_node *node; - struct tt_local_entry *tt_local_entry; + struct tt_common_entry *tt_common_entry; if (!hash) - return; + goto out; for (i = 0; i < hash->size; i++) { head = &hash->table[i]; rcu_read_lock(); - hlist_for_each_entry_rcu(tt_local_entry, node, + hlist_for_each_entry_rcu(tt_common_entry, node, head, hash_entry) { - tt_local_entry->flags &= ~flags; - atomic_inc(&bat_priv->num_local_tt); + if (enable) { + if ((tt_common_entry->flags & flags) == flags) + continue; + tt_common_entry->flags |= flags; + } else { + if (!(tt_common_entry->flags & flags)) + continue; + tt_common_entry->flags &= ~flags; + } + changed_num++; } rcu_read_unlock(); } - +out: + return changed_num; } /* Purge out all the tt local entries marked with TT_CLIENT_PENDING */ static void tt_local_purge_pending_clients(struct bat_priv *bat_priv) { struct hashtable_t *hash = bat_priv->tt_local_hash; + struct tt_common_entry *tt_common_entry; struct tt_local_entry *tt_local_entry; struct hlist_node *node, *node_tmp; struct hlist_head *head; spinlock_t *list_lock; /* protects write access to the hash lists */ - int i; + uint32_t i; if (!hash) return; @@ -1694,16 +1783,19 @@ static void tt_local_purge_pending_clients(struct bat_priv *bat_priv) list_lock = &hash->list_locks[i]; spin_lock_bh(list_lock); - hlist_for_each_entry_safe(tt_local_entry, node, node_tmp, + hlist_for_each_entry_safe(tt_common_entry, node, node_tmp, head, hash_entry) { - if (!(tt_local_entry->flags & TT_CLIENT_PENDING)) + if (!(tt_common_entry->flags & TT_CLIENT_PENDING)) continue; bat_dbg(DBG_TT, bat_priv, "Deleting local tt entry " - "(%pM): pending\n", tt_local_entry->addr); + "(%pM): pending\n", tt_common_entry->addr); atomic_dec(&bat_priv->num_local_tt); hlist_del_rcu(node); + tt_local_entry = container_of(tt_common_entry, + struct tt_local_entry, + common); tt_local_entry_free_ref(tt_local_entry); } spin_unlock_bh(list_lock); @@ -1713,10 +1805,101 @@ static void tt_local_purge_pending_clients(struct bat_priv *bat_priv) void tt_commit_changes(struct bat_priv *bat_priv) { - tt_local_reset_flags(bat_priv, TT_CLIENT_NEW); + uint16_t changed_num = tt_set_flags(bat_priv->tt_local_hash, + TT_CLIENT_NEW, false); + /* all the reset entries have now to be effectively counted as local + * entries */ + atomic_add(changed_num, &bat_priv->num_local_tt); tt_local_purge_pending_clients(bat_priv); /* Increment the TTVN only once per OGM interval */ atomic_inc(&bat_priv->ttvn); bat_priv->tt_poss_change = false; } + +bool is_ap_isolated(struct bat_priv *bat_priv, uint8_t *src, uint8_t *dst) +{ + struct tt_local_entry *tt_local_entry = NULL; + struct tt_global_entry *tt_global_entry = NULL; + bool ret = true; + + if (!atomic_read(&bat_priv->ap_isolation)) + return false; + + tt_local_entry = tt_local_hash_find(bat_priv, dst); + if (!tt_local_entry) + goto out; + + tt_global_entry = tt_global_hash_find(bat_priv, src); + if (!tt_global_entry) + goto out; + + if (_is_ap_isolated(tt_local_entry, tt_global_entry)) + goto out; + + ret = false; + +out: + if (tt_global_entry) + tt_global_entry_free_ref(tt_global_entry); + if (tt_local_entry) + tt_local_entry_free_ref(tt_local_entry); + return ret; +} + +void tt_update_orig(struct bat_priv *bat_priv, struct orig_node *orig_node, + const unsigned char *tt_buff, uint8_t tt_num_changes, + uint8_t ttvn, uint16_t tt_crc) +{ + uint8_t orig_ttvn = (uint8_t)atomic_read(&orig_node->last_ttvn); + bool full_table = true; + + /* the ttvn increased by one -> we can apply the attached changes */ + if (ttvn - orig_ttvn == 1) { + /* the OGM could not contain the changes due to their size or + * because they have already been sent TT_OGM_APPEND_MAX times. + * In this case send a tt request */ + if (!tt_num_changes) { + full_table = false; + goto request_table; + } + + tt_update_changes(bat_priv, orig_node, tt_num_changes, ttvn, + (struct tt_change *)tt_buff); + + /* Even if we received the precomputed crc with the OGM, we + * prefer to recompute it to spot any possible inconsistency + * in the global table */ + orig_node->tt_crc = tt_global_crc(bat_priv, orig_node); + + /* The ttvn alone is not enough to guarantee consistency + * because a single value could represent different states + * (due to the wrap around). Thus a node has to check whether + * the resulting table (after applying the changes) is still + * consistent or not. E.g. a node could disconnect while its + * ttvn is X and reconnect on ttvn = X + TTVN_MAX: in this case + * checking the CRC value is mandatory to detect the + * inconsistency */ + if (orig_node->tt_crc != tt_crc) + goto request_table; + + /* Roaming phase is over: tables are in sync again. I can + * unset the flag */ + orig_node->tt_poss_change = false; + } else { + /* if we missed more than one change or our tables are not + * in sync anymore -> request fresh tt data */ + if (ttvn != orig_ttvn || orig_node->tt_crc != tt_crc) { +request_table: + bat_dbg(DBG_TT, bat_priv, "TT inconsistency for %pM. " + "Need to retrieve the correct information " + "(ttvn: %u last_ttvn: %u crc: %u last_crc: " + "%u num_changes: %u)\n", orig_node->orig, ttvn, + orig_ttvn, tt_crc, orig_node->tt_crc, + tt_num_changes); + send_tt_request(bat_priv, orig_node, ttvn, tt_crc, + full_table); + return; + } + } +} diff --git a/net/batman-adv/translation-table.h b/net/batman-adv/translation-table.h index d4122cba53b8..30efd49881a3 100644 --- a/net/batman-adv/translation-table.h +++ b/net/batman-adv/translation-table.h @@ -26,15 +26,16 @@ int tt_len(int changes_num); int tt_changes_fill_buffer(struct bat_priv *bat_priv, unsigned char *buff, int buff_len); int tt_init(struct bat_priv *bat_priv); -void tt_local_add(struct net_device *soft_iface, const uint8_t *addr); +void tt_local_add(struct net_device *soft_iface, const uint8_t *addr, + int ifindex); void tt_local_remove(struct bat_priv *bat_priv, const uint8_t *addr, const char *message, bool roaming); int tt_local_seq_print_text(struct seq_file *seq, void *offset); void tt_global_add_orig(struct bat_priv *bat_priv, struct orig_node *orig_node, const unsigned char *tt_buff, int tt_buff_len); -int tt_global_add(struct bat_priv *bat_priv, - struct orig_node *orig_node, const unsigned char *addr, - uint8_t ttvn, bool roaming); +int tt_global_add(struct bat_priv *bat_priv, struct orig_node *orig_node, + const unsigned char *addr, uint8_t ttvn, bool roaming, + bool wifi); int tt_global_seq_print_text(struct seq_file *seq, void *offset); void tt_global_del_orig(struct bat_priv *bat_priv, struct orig_node *orig_node, const char *message); @@ -42,25 +43,23 @@ void tt_global_del(struct bat_priv *bat_priv, struct orig_node *orig_node, const unsigned char *addr, const char *message, bool roaming); struct orig_node *transtable_search(struct bat_priv *bat_priv, - const uint8_t *addr); + const uint8_t *src, const uint8_t *addr); void tt_save_orig_buffer(struct bat_priv *bat_priv, struct orig_node *orig_node, const unsigned char *tt_buff, uint8_t tt_num_changes); uint16_t tt_local_crc(struct bat_priv *bat_priv); uint16_t tt_global_crc(struct bat_priv *bat_priv, struct orig_node *orig_node); void tt_free(struct bat_priv *bat_priv); -int send_tt_request(struct bat_priv *bat_priv, - struct orig_node *dst_orig_node, uint8_t hvn, - uint16_t tt_crc, bool full_table); bool send_tt_response(struct bat_priv *bat_priv, struct tt_query_packet *tt_request); -void tt_update_changes(struct bat_priv *bat_priv, struct orig_node *orig_node, - uint16_t tt_num_changes, uint8_t ttvn, - struct tt_change *tt_change); bool is_my_client(struct bat_priv *bat_priv, const uint8_t *addr); void handle_tt_response(struct bat_priv *bat_priv, struct tt_query_packet *tt_response); void send_roam_adv(struct bat_priv *bat_priv, uint8_t *client, struct orig_node *orig_node); void tt_commit_changes(struct bat_priv *bat_priv); +bool is_ap_isolated(struct bat_priv *bat_priv, uint8_t *src, uint8_t *dst); +void tt_update_orig(struct bat_priv *bat_priv, struct orig_node *orig_node, + const unsigned char *tt_buff, uint8_t tt_num_changes, + uint8_t ttvn, uint16_t tt_crc); #endif /* _NET_BATMAN_ADV_TRANSLATION_TABLE_H_ */ diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 25bd1db35370..e9eb043719ac 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -57,7 +57,7 @@ struct hard_iface { * @batman_seqno_reset: time when the batman seqno window was reset * @gw_flags: flags related to gateway class * @flags: for now only VIS_SERVER flag - * @last_real_seqno: last and best known squence number + * @last_real_seqno: last and best known sequence number * @last_ttl: ttl of last received packet * @last_bcast_seqno: last broadcast sequence number received by this host * @@ -146,6 +146,7 @@ struct bat_priv { atomic_t aggregated_ogms; /* boolean */ atomic_t bonding; /* boolean */ atomic_t fragmentation; /* boolean */ + atomic_t ap_isolation; /* boolean */ atomic_t vis_mode; /* VIS_TYPE_* */ atomic_t gw_mode; /* GW_MODE_* */ atomic_t gw_sel_class; /* uint */ @@ -156,7 +157,7 @@ struct bat_priv { atomic_t bcast_seqno; atomic_t bcast_queue_left; atomic_t batman_queue_left; - atomic_t ttvn; /* tranlation table version number */ + atomic_t ttvn; /* translation table version number */ atomic_t tt_ogm_append_cnt; atomic_t tt_local_changes; /* changes registered in a OGM interval */ /* The tt_poss_change flag is used to detect an ongoing roaming phase. @@ -221,24 +222,24 @@ struct socket_packet { struct icmp_packet_rr icmp_packet; }; -struct tt_local_entry { +struct tt_common_entry { uint8_t addr[ETH_ALEN]; - unsigned long last_seen; + struct hlist_node hash_entry; uint16_t flags; atomic_t refcount; struct rcu_head rcu; - struct hlist_node hash_entry; +}; + +struct tt_local_entry { + struct tt_common_entry common; + unsigned long last_seen; }; struct tt_global_entry { - uint8_t addr[ETH_ALEN]; + struct tt_common_entry common; struct orig_node *orig_node; uint8_t ttvn; - uint16_t flags; /* only TT_GLOBAL_ROAM is used */ unsigned long roam_at; /* time at which TT_GLOBAL_ROAM was set */ - atomic_t refcount; - struct rcu_head rcu; - struct hlist_node hash_entry; /* entry in the global table */ }; struct tt_change_node { diff --git a/net/batman-adv/unicast.c b/net/batman-adv/unicast.c index 32b125fb3d3b..07d1c1da89dd 100644 --- a/net/batman-adv/unicast.c +++ b/net/batman-adv/unicast.c @@ -299,8 +299,10 @@ int unicast_send_skb(struct sk_buff *skb, struct bat_priv *bat_priv) goto find_router; } - /* check for tt host - increases orig_node refcount */ - orig_node = transtable_search(bat_priv, ethhdr->h_dest); + /* check for tt host - increases orig_node refcount. + * returns NULL in case of AP isolation */ + orig_node = transtable_search(bat_priv, ethhdr->h_source, + ethhdr->h_dest); find_router: /** diff --git a/net/batman-adv/unicast.h b/net/batman-adv/unicast.h index 62f54b954625..8fd5535544b9 100644 --- a/net/batman-adv/unicast.h +++ b/net/batman-adv/unicast.h @@ -24,7 +24,7 @@ #include "packet.h" -#define FRAG_TIMEOUT 10000 /* purge frag list entrys after time in ms */ +#define FRAG_TIMEOUT 10000 /* purge frag list entries after time in ms */ #define FRAG_BUFFER_SIZE 6 /* number of list elements in buffer */ int frag_reassemble_skb(struct sk_buff *skb, struct bat_priv *bat_priv, diff --git a/net/batman-adv/vis.c b/net/batman-adv/vis.c index 8a1b98589d76..cc3b9f2f3b5d 100644 --- a/net/batman-adv/vis.c +++ b/net/batman-adv/vis.c @@ -66,7 +66,7 @@ static int vis_info_cmp(const struct hlist_node *node, const void *data2) /* hash function to choose an entry in a hash table of given size */ /* hash algorithm from http://en.wikipedia.org/wiki/Hash_table */ -static int vis_info_choose(const void *data, int size) +static uint32_t vis_info_choose(const void *data, uint32_t size) { const struct vis_info *vis_info = data; const struct vis_packet *packet; @@ -96,7 +96,7 @@ static struct vis_info *vis_hash_find(struct bat_priv *bat_priv, struct hlist_head *head; struct hlist_node *node; struct vis_info *vis_info, *vis_info_tmp = NULL; - int index; + uint32_t index; if (!hash) return NULL; @@ -131,7 +131,7 @@ static void vis_data_insert_interface(const uint8_t *interface, return; } - /* its a new address, add it to the list */ + /* it's a new address, add it to the list */ entry = kmalloc(sizeof(*entry), GFP_ATOMIC); if (!entry) return; @@ -202,7 +202,8 @@ int vis_seq_print_text(struct seq_file *seq, void *offset) HLIST_HEAD(vis_if_list); struct if_list_entry *entry; struct hlist_node *pos, *n; - int i, j, ret = 0; + uint32_t i; + int j, ret = 0; int vis_server = atomic_read(&bat_priv->vis_mode); size_t buff_pos, buf_size; char *buff; @@ -465,7 +466,7 @@ static struct vis_info *add_packet(struct bat_priv *bat_priv, /* try to add it */ hash_added = hash_add(bat_priv->vis_hash, vis_info_cmp, vis_info_choose, info, &info->hash_entry); - if (hash_added < 0) { + if (hash_added != 0) { /* did not work (for some reason) */ kref_put(&info->refcount, free_info); info = NULL; @@ -556,7 +557,8 @@ static int find_best_vis_server(struct bat_priv *bat_priv, struct hlist_head *head; struct orig_node *orig_node; struct vis_packet *packet; - int best_tq = -1, i; + int best_tq = -1; + uint32_t i; packet = (struct vis_packet *)info->skb_packet->data; @@ -607,8 +609,9 @@ static int generate_vis_packet(struct bat_priv *bat_priv) struct vis_info *info = bat_priv->my_vis_info; struct vis_packet *packet = (struct vis_packet *)info->skb_packet->data; struct vis_info_entry *entry; - struct tt_local_entry *tt_local_entry; - int best_tq = -1, i; + struct tt_common_entry *tt_common_entry; + int best_tq = -1; + uint32_t i; info->first_seen = jiffies; packet->vis_type = atomic_read(&bat_priv->vis_mode); @@ -669,13 +672,13 @@ next: head = &hash->table[i]; rcu_read_lock(); - hlist_for_each_entry_rcu(tt_local_entry, node, head, + hlist_for_each_entry_rcu(tt_common_entry, node, head, hash_entry) { entry = (struct vis_info_entry *) skb_put(info->skb_packet, sizeof(*entry)); memset(entry->src, 0, ETH_ALEN); - memcpy(entry->dest, tt_local_entry->addr, ETH_ALEN); + memcpy(entry->dest, tt_common_entry->addr, ETH_ALEN); entry->quality = 0; /* 0 means TT */ packet->entries++; @@ -696,7 +699,7 @@ unlock: * held */ static void purge_vis_packets(struct bat_priv *bat_priv) { - int i; + uint32_t i; struct hashtable_t *hash = bat_priv->vis_hash; struct hlist_node *node, *node_tmp; struct hlist_head *head; @@ -733,7 +736,7 @@ static void broadcast_vis_packet(struct bat_priv *bat_priv, struct sk_buff *skb; struct hard_iface *hard_iface; uint8_t dstaddr[ETH_ALEN]; - int i; + uint32_t i; packet = (struct vis_packet *)info->skb_packet->data; @@ -887,10 +890,8 @@ int vis_init(struct bat_priv *bat_priv) } bat_priv->my_vis_info = kmalloc(MAX_VIS_PACKET_SIZE, GFP_ATOMIC); - if (!bat_priv->my_vis_info) { - pr_err("Can't initialize vis packet\n"); + if (!bat_priv->my_vis_info) goto err; - } bat_priv->my_vis_info->skb_packet = dev_alloc_skb(sizeof(*packet) + MAX_VIS_PACKET_SIZE + @@ -920,7 +921,7 @@ int vis_init(struct bat_priv *bat_priv) hash_added = hash_add(bat_priv->vis_hash, vis_info_cmp, vis_info_choose, bat_priv->my_vis_info, &bat_priv->my_vis_info->hash_entry); - if (hash_added < 0) { + if (hash_added != 0) { pr_err("Can't add own vis packet into hash\n"); /* not in hash, need to remove it manually. */ kref_put(&bat_priv->my_vis_info->refcount, free_info); diff --git a/net/bluetooth/Kconfig b/net/bluetooth/Kconfig index bfb3dc03c9de..9ec85eb8853d 100644 --- a/net/bluetooth/Kconfig +++ b/net/bluetooth/Kconfig @@ -6,7 +6,11 @@ menuconfig BT tristate "Bluetooth subsystem support" depends on NET && !S390 depends on RFKILL || !RFKILL + select CRC16 select CRYPTO + select CRYPTO_BLKCIPHER + select CRYPTO_AES + select CRYPTO_ECB help Bluetooth is low-cost, low-power, short-range wireless technology. It was designed as a replacement for cables and other short-range @@ -15,10 +19,12 @@ menuconfig BT Bluetooth can be found at <http://www.bluetooth.com/>. Linux Bluetooth subsystem consist of several layers: - Bluetooth Core (HCI device and connection manager, scheduler) + Bluetooth Core + HCI device and connection manager, scheduler + SCO audio links + L2CAP (Logical Link Control and Adaptation Protocol) + SMP (Security Manager Protocol) on LE (Low Energy) links HCI Device drivers (Interface to the hardware) - SCO Module (SCO audio links) - L2CAP Module (Logical Link Control and Adaptation Protocol) RFCOMM Module (RFCOMM Protocol) BNEP Module (Bluetooth Network Encapsulation Protocol) CMTP Module (CAPI Message Transport Protocol) @@ -33,31 +39,6 @@ menuconfig BT to Bluetooth kernel modules are provided in the BlueZ packages. For more information, see <http://www.bluez.org/>. -if BT != n - -config BT_L2CAP - bool "L2CAP protocol support" - select CRC16 - select CRYPTO - select CRYPTO_BLKCIPHER - select CRYPTO_AES - select CRYPTO_ECB - help - L2CAP (Logical Link Control and Adaptation Protocol) provides - connection oriented and connection-less data transport. L2CAP - support is required for most Bluetooth applications. - - Also included is support for SMP (Security Manager Protocol) which - is the security layer on top of LE (Low Energy) links. - -config BT_SCO - bool "SCO links support" - help - SCO link provides voice transport over Bluetooth. SCO support is - required for voice applications like Headset and Audio. - -endif - source "net/bluetooth/rfcomm/Kconfig" source "net/bluetooth/bnep/Kconfig" diff --git a/net/bluetooth/Makefile b/net/bluetooth/Makefile index 9b67f3d08fa4..2dc5a5700f53 100644 --- a/net/bluetooth/Makefile +++ b/net/bluetooth/Makefile @@ -8,6 +8,5 @@ obj-$(CONFIG_BT_BNEP) += bnep/ obj-$(CONFIG_BT_CMTP) += cmtp/ obj-$(CONFIG_BT_HIDP) += hidp/ -bluetooth-y := af_bluetooth.o hci_core.o hci_conn.o hci_event.o mgmt.o hci_sock.o hci_sysfs.o lib.o -bluetooth-$(CONFIG_BT_L2CAP) += l2cap_core.o l2cap_sock.o smp.o -bluetooth-$(CONFIG_BT_SCO) += sco.o +bluetooth-y := af_bluetooth.o hci_core.o hci_conn.o hci_event.o mgmt.o \ + hci_sock.o hci_sysfs.o l2cap_core.o l2cap_sock.o smp.o sco.o lib.o diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 117e0d161780..ef92864ac625 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -156,17 +156,17 @@ static int bt_sock_create(struct net *net, struct socket *sock, int proto, void bt_sock_link(struct bt_sock_list *l, struct sock *sk) { - write_lock_bh(&l->lock); + write_lock(&l->lock); sk_add_node(sk, &l->head); - write_unlock_bh(&l->lock); + write_unlock(&l->lock); } EXPORT_SYMBOL(bt_sock_link); void bt_sock_unlink(struct bt_sock_list *l, struct sock *sk) { - write_lock_bh(&l->lock); + write_lock(&l->lock); sk_del_node_init(sk); - write_unlock_bh(&l->lock); + write_unlock(&l->lock); } EXPORT_SYMBOL(bt_sock_unlink); @@ -199,15 +199,14 @@ struct sock *bt_accept_dequeue(struct sock *parent, struct socket *newsock) BT_DBG("parent %p", parent); - local_bh_disable(); list_for_each_safe(p, n, &bt_sk(parent)->accept_q) { sk = (struct sock *) list_entry(p, struct bt_sock, accept_q); - bh_lock_sock(sk); + lock_sock(sk); /* FIXME: Is this check still needed */ if (sk->sk_state == BT_CLOSED) { - bh_unlock_sock(sk); + release_sock(sk); bt_accept_unlink(sk); continue; } @@ -218,14 +217,12 @@ struct sock *bt_accept_dequeue(struct sock *parent, struct socket *newsock) if (newsock) sock_graft(sk, newsock); - bh_unlock_sock(sk); - local_bh_enable(); + release_sock(sk); return sk; } - bh_unlock_sock(sk); + release_sock(sk); } - local_bh_enable(); return NULL; } @@ -349,7 +346,7 @@ int bt_sock_stream_recvmsg(struct kiocb *iocb, struct socket *sock, } chunk = min_t(unsigned int, skb->len, size); - if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) { + if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, chunk)) { skb_queue_head(&sk->sk_receive_queue, skb); if (!copied) copied = -EFAULT; @@ -361,7 +358,33 @@ int bt_sock_stream_recvmsg(struct kiocb *iocb, struct socket *sock, sock_recv_ts_and_drops(msg, sk, skb); if (!(flags & MSG_PEEK)) { - skb_pull(skb, chunk); + int skb_len = skb_headlen(skb); + + if (chunk <= skb_len) { + __skb_pull(skb, chunk); + } else { + struct sk_buff *frag; + + __skb_pull(skb, skb_len); + chunk -= skb_len; + + skb_walk_frags(skb, frag) { + if (chunk <= frag->len) { + /* Pulling partial data */ + skb->len -= chunk; + skb->data_len -= chunk; + __skb_pull(frag, chunk); + break; + } else if (frag->len) { + /* Pulling all frag data */ + chunk -= frag->len; + skb->len -= frag->len; + skb->data_len -= frag->len; + __skb_pull(frag, frag->len); + } + } + } + if (skb->len) { skb_queue_head(&sk->sk_receive_queue, skb); break; diff --git a/net/bluetooth/bnep/Kconfig b/net/bluetooth/bnep/Kconfig index 35158b036d54..71791fc9f6b1 100644 --- a/net/bluetooth/bnep/Kconfig +++ b/net/bluetooth/bnep/Kconfig @@ -1,6 +1,6 @@ config BT_BNEP tristate "BNEP protocol support" - depends on BT && BT_L2CAP + depends on BT select CRC32 help BNEP (Bluetooth Network Encapsulation Protocol) is Ethernet diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c index d9edfe8bf9d6..a779ec703323 100644 --- a/net/bluetooth/bnep/core.c +++ b/net/bluetooth/bnep/core.c @@ -56,8 +56,8 @@ #define VERSION "1.3" -static int compress_src = 1; -static int compress_dst = 1; +static bool compress_src = true; +static bool compress_dst = true; static LIST_HEAD(bnep_session_list); static DECLARE_RWSEM(bnep_session_sem); @@ -65,31 +65,24 @@ static DECLARE_RWSEM(bnep_session_sem); static struct bnep_session *__bnep_get_session(u8 *dst) { struct bnep_session *s; - struct list_head *p; BT_DBG(""); - list_for_each(p, &bnep_session_list) { - s = list_entry(p, struct bnep_session, list); + list_for_each_entry(s, &bnep_session_list, list) if (!compare_ether_addr(dst, s->eh.h_source)) return s; - } + return NULL; } static void __bnep_link_session(struct bnep_session *s) { - /* It's safe to call __module_get() here because sessions are added - by the socket layer which has to hold the reference to this module. - */ - __module_get(THIS_MODULE); list_add(&s->list, &bnep_session_list); } static void __bnep_unlink_session(struct bnep_session *s) { list_del(&s->list); - module_put(THIS_MODULE); } static int bnep_send(struct bnep_session *s, void *data, size_t len) @@ -492,7 +485,10 @@ static int bnep_session(void *arg) /* RX */ while ((skb = skb_dequeue(&sk->sk_receive_queue))) { skb_orphan(skb); - bnep_rx_frame(s, skb); + if (!skb_linearize(skb)) + bnep_rx_frame(s, skb); + else + kfree_skb(skb); } if (sk->sk_state != BT_CONNECTED) @@ -527,6 +523,7 @@ static int bnep_session(void *arg) up_write(&bnep_session_sem); free_netdev(dev); + module_put_and_exit(0); return 0; } @@ -613,9 +610,11 @@ int bnep_add_connection(struct bnep_connadd_req *req, struct socket *sock) __bnep_link_session(s); + __module_get(THIS_MODULE); s->task = kthread_run(bnep_session, s, "kbnepd %s", dev->name); if (IS_ERR(s->task)) { /* Session thread start failed, gotta cleanup. */ + module_put(THIS_MODULE); unregister_netdev(dev); __bnep_unlink_session(s); err = PTR_ERR(s->task); @@ -664,17 +663,14 @@ static void __bnep_copy_ci(struct bnep_conninfo *ci, struct bnep_session *s) int bnep_get_connlist(struct bnep_connlist_req *req) { - struct list_head *p; + struct bnep_session *s; int err = 0, n = 0; down_read(&bnep_session_sem); - list_for_each(p, &bnep_session_list) { - struct bnep_session *s; + list_for_each_entry(s, &bnep_session_list, list) { struct bnep_conninfo ci; - s = list_entry(p, struct bnep_session, list); - __bnep_copy_ci(&ci, s); if (copy_to_user(req->ci, &ci, sizeof(ci))) { diff --git a/net/bluetooth/bnep/netdev.c b/net/bluetooth/bnep/netdev.c index d4f5dff7c955..bc4086480d97 100644 --- a/net/bluetooth/bnep/netdev.c +++ b/net/bluetooth/bnep/netdev.c @@ -217,7 +217,7 @@ static const struct net_device_ops bnep_netdev_ops = { .ndo_stop = bnep_net_close, .ndo_start_xmit = bnep_net_xmit, .ndo_validate_addr = eth_validate_addr, - .ndo_set_multicast_list = bnep_net_set_mc_list, + .ndo_set_rx_mode = bnep_net_set_mc_list, .ndo_set_mac_address = bnep_net_set_mac_addr, .ndo_tx_timeout = bnep_net_timeout, .ndo_change_mtu = eth_change_mtu, diff --git a/net/bluetooth/cmtp/Kconfig b/net/bluetooth/cmtp/Kconfig index d6b0382f6f3a..94cbf42ce155 100644 --- a/net/bluetooth/cmtp/Kconfig +++ b/net/bluetooth/cmtp/Kconfig @@ -1,6 +1,6 @@ config BT_CMTP tristate "CMTP protocol support" - depends on BT && BT_L2CAP && ISDN_CAPI + depends on BT && ISDN_CAPI help CMTP (CAPI Message Transport Protocol) is a transport layer for CAPI messages. CMTP is required for the Bluetooth Common diff --git a/net/bluetooth/cmtp/core.c b/net/bluetooth/cmtp/core.c index 521baa4fe835..6c9c1fd601ca 100644 --- a/net/bluetooth/cmtp/core.c +++ b/net/bluetooth/cmtp/core.c @@ -53,28 +53,24 @@ static LIST_HEAD(cmtp_session_list); static struct cmtp_session *__cmtp_get_session(bdaddr_t *bdaddr) { struct cmtp_session *session; - struct list_head *p; BT_DBG(""); - list_for_each(p, &cmtp_session_list) { - session = list_entry(p, struct cmtp_session, list); + list_for_each_entry(session, &cmtp_session_list, list) if (!bacmp(bdaddr, &session->bdaddr)) return session; - } + return NULL; } static void __cmtp_link_session(struct cmtp_session *session) { - __module_get(THIS_MODULE); list_add(&session->list, &cmtp_session_list); } static void __cmtp_unlink_session(struct cmtp_session *session) { list_del(&session->list); - module_put(THIS_MODULE); } static void __cmtp_copy_session(struct cmtp_session *session, struct cmtp_conninfo *ci) @@ -302,7 +298,10 @@ static int cmtp_session(void *arg) while ((skb = skb_dequeue(&sk->sk_receive_queue))) { skb_orphan(skb); - cmtp_recv_frame(session, skb); + if (!skb_linearize(skb)) + cmtp_recv_frame(session, skb); + else + kfree_skb(skb); } cmtp_process_transmit(session); @@ -324,6 +323,7 @@ static int cmtp_session(void *arg) up_write(&cmtp_session_sem); kfree(session); + module_put_and_exit(0); return 0; } @@ -373,9 +373,11 @@ int cmtp_add_connection(struct cmtp_connadd_req *req, struct socket *sock) __cmtp_link_session(session); + __module_get(THIS_MODULE); session->task = kthread_run(cmtp_session, session, "kcmtpd_ctr_%d", session->num); if (IS_ERR(session->task)) { + module_put(THIS_MODULE); err = PTR_ERR(session->task); goto unlink; } @@ -428,19 +430,16 @@ int cmtp_del_connection(struct cmtp_conndel_req *req) int cmtp_get_connlist(struct cmtp_connlist_req *req) { - struct list_head *p; + struct cmtp_session *session; int err = 0, n = 0; BT_DBG(""); down_read(&cmtp_session_sem); - list_for_each(p, &cmtp_session_list) { - struct cmtp_session *session; + list_for_each_entry(session, &cmtp_session_list, list) { struct cmtp_conninfo ci; - session = list_entry(p, struct cmtp_session, list); - __cmtp_copy_session(session, &ci); if (copy_to_user(req->ci, &ci, sizeof(ci))) { diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index ea7f031f3b04..3db432473ad5 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -56,15 +56,15 @@ static void hci_le_connect(struct hci_conn *conn) conn->sec_level = BT_SECURITY_LOW; memset(&cp, 0, sizeof(cp)); - cp.scan_interval = cpu_to_le16(0x0004); - cp.scan_window = cpu_to_le16(0x0004); + cp.scan_interval = cpu_to_le16(0x0060); + cp.scan_window = cpu_to_le16(0x0030); bacpy(&cp.peer_addr, &conn->dst); cp.peer_addr_type = conn->dst_type; - cp.conn_interval_min = cpu_to_le16(0x0008); - cp.conn_interval_max = cpu_to_le16(0x0100); - cp.supervision_timeout = cpu_to_le16(0x0064); - cp.min_ce_len = cpu_to_le16(0x0001); - cp.max_ce_len = cpu_to_le16(0x0001); + cp.conn_interval_min = cpu_to_le16(0x0028); + cp.conn_interval_max = cpu_to_le16(0x0038); + cp.supervision_timeout = cpu_to_le16(0x002a); + cp.min_ce_len = cpu_to_le16(0x0000); + cp.max_ce_len = cpu_to_le16(0x0000); hci_send_cmd(hdev, HCI_OP_LE_CREATE_CONN, sizeof(cp), &cp); } @@ -123,7 +123,7 @@ static void hci_acl_connect_cancel(struct hci_conn *conn) BT_DBG("%p", conn); - if (conn->hdev->hci_ver < 2) + if (conn->hdev->hci_ver < BLUETOOTH_VER_1_2) return; bacpy(&cp.bdaddr, &conn->dst); @@ -218,7 +218,7 @@ void hci_le_start_enc(struct hci_conn *conn, __le16 ediv, __u8 rand[8], cp.handle = cpu_to_le16(conn->handle); memcpy(cp.ltk, ltk, sizeof(cp.ltk)); cp.ediv = ediv; - memcpy(cp.rand, rand, sizeof(rand)); + memcpy(cp.rand, rand, sizeof(cp.rand)); hci_send_cmd(hdev, HCI_OP_LE_START_ENC, sizeof(cp), &cp); } @@ -275,9 +275,10 @@ void hci_sco_setup(struct hci_conn *conn, __u8 status) } } -static void hci_conn_timeout(unsigned long arg) +static void hci_conn_timeout(struct work_struct *work) { - struct hci_conn *conn = (void *) arg; + struct hci_conn *conn = container_of(work, struct hci_conn, + disc_work.work); struct hci_dev *hdev = conn->hdev; __u8 reason; @@ -311,6 +312,42 @@ static void hci_conn_timeout(unsigned long arg) hci_dev_unlock(hdev); } +/* Enter sniff mode */ +static void hci_conn_enter_sniff_mode(struct hci_conn *conn) +{ + struct hci_dev *hdev = conn->hdev; + + BT_DBG("conn %p mode %d", conn, conn->mode); + + if (test_bit(HCI_RAW, &hdev->flags)) + return; + + if (!lmp_sniff_capable(hdev) || !lmp_sniff_capable(conn)) + return; + + if (conn->mode != HCI_CM_ACTIVE || !(conn->link_policy & HCI_LP_SNIFF)) + return; + + if (lmp_sniffsubr_capable(hdev) && lmp_sniffsubr_capable(conn)) { + struct hci_cp_sniff_subrate cp; + cp.handle = cpu_to_le16(conn->handle); + cp.max_latency = cpu_to_le16(0); + cp.min_remote_timeout = cpu_to_le16(0); + cp.min_local_timeout = cpu_to_le16(0); + hci_send_cmd(hdev, HCI_OP_SNIFF_SUBRATE, sizeof(cp), &cp); + } + + if (!test_and_set_bit(HCI_CONN_MODE_CHANGE_PEND, &conn->pend)) { + struct hci_cp_sniff_mode cp; + cp.handle = cpu_to_le16(conn->handle); + cp.max_interval = cpu_to_le16(hdev->sniff_max_interval); + cp.min_interval = cpu_to_le16(hdev->sniff_min_interval); + cp.attempt = cpu_to_le16(4); + cp.timeout = cpu_to_le16(1); + hci_send_cmd(hdev, HCI_OP_SNIFF_MODE, sizeof(cp), &cp); + } +} + static void hci_conn_idle(unsigned long arg) { struct hci_conn *conn = (void *) arg; @@ -325,12 +362,8 @@ static void hci_conn_auto_accept(unsigned long arg) struct hci_conn *conn = (void *) arg; struct hci_dev *hdev = conn->hdev; - hci_dev_lock(hdev); - hci_send_cmd(hdev, HCI_OP_USER_CONFIRM_REPLY, sizeof(conn->dst), &conn->dst); - - hci_dev_unlock(hdev); } struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst) @@ -374,7 +407,9 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst) skb_queue_head_init(&conn->data_q); - setup_timer(&conn->disc_timer, hci_conn_timeout, (unsigned long)conn); + INIT_LIST_HEAD(&conn->chan_list);; + + INIT_DELAYED_WORK(&conn->disc_work, hci_conn_timeout); setup_timer(&conn->idle_timer, hci_conn_idle, (unsigned long)conn); setup_timer(&conn->auto_accept_timer, hci_conn_auto_accept, (unsigned long) conn); @@ -383,8 +418,6 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst) hci_dev_hold(hdev); - tasklet_disable(&hdev->tx_task); - hci_conn_hash_add(hdev, conn); if (hdev->notify) hdev->notify(hdev, HCI_NOTIFY_CONN_ADD); @@ -393,8 +426,6 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst) hci_conn_init_sysfs(conn); - tasklet_enable(&hdev->tx_task); - return conn; } @@ -406,7 +437,7 @@ int hci_conn_del(struct hci_conn *conn) del_timer(&conn->idle_timer); - del_timer(&conn->disc_timer); + cancel_delayed_work_sync(&conn->disc_work); del_timer(&conn->auto_accept_timer); @@ -430,14 +461,13 @@ int hci_conn_del(struct hci_conn *conn) } } - tasklet_disable(&hdev->tx_task); + + hci_chan_list_flush(conn); hci_conn_hash_del(hdev, conn); if (hdev->notify) hdev->notify(hdev, HCI_NOTIFY_CONN_DEL); - tasklet_enable(&hdev->tx_task); - skb_queue_purge(&conn->data_q); hci_conn_put_device(conn); @@ -453,16 +483,13 @@ int hci_conn_del(struct hci_conn *conn) struct hci_dev *hci_get_route(bdaddr_t *dst, bdaddr_t *src) { int use_src = bacmp(src, BDADDR_ANY); - struct hci_dev *hdev = NULL; - struct list_head *p; + struct hci_dev *hdev = NULL, *d; BT_DBG("%s -> %s", batostr(src), batostr(dst)); - read_lock_bh(&hci_dev_list_lock); - - list_for_each(p, &hci_dev_list) { - struct hci_dev *d = list_entry(p, struct hci_dev, list); + read_lock(&hci_dev_list_lock); + list_for_each_entry(d, &hci_dev_list, list) { if (!test_bit(HCI_UP, &d->flags) || test_bit(HCI_RAW, &d->flags)) continue; @@ -485,7 +512,7 @@ struct hci_dev *hci_get_route(bdaddr_t *dst, bdaddr_t *src) if (hdev) hdev = hci_dev_hold(hdev); - read_unlock_bh(&hci_dev_list_lock); + read_unlock(&hci_dev_list_lock); return hdev; } EXPORT_SYMBOL(hci_get_route); @@ -766,60 +793,18 @@ timer: jiffies + msecs_to_jiffies(hdev->idle_timeout)); } -/* Enter sniff mode */ -void hci_conn_enter_sniff_mode(struct hci_conn *conn) -{ - struct hci_dev *hdev = conn->hdev; - - BT_DBG("conn %p mode %d", conn, conn->mode); - - if (test_bit(HCI_RAW, &hdev->flags)) - return; - - if (!lmp_sniff_capable(hdev) || !lmp_sniff_capable(conn)) - return; - - if (conn->mode != HCI_CM_ACTIVE || !(conn->link_policy & HCI_LP_SNIFF)) - return; - - if (lmp_sniffsubr_capable(hdev) && lmp_sniffsubr_capable(conn)) { - struct hci_cp_sniff_subrate cp; - cp.handle = cpu_to_le16(conn->handle); - cp.max_latency = cpu_to_le16(0); - cp.min_remote_timeout = cpu_to_le16(0); - cp.min_local_timeout = cpu_to_le16(0); - hci_send_cmd(hdev, HCI_OP_SNIFF_SUBRATE, sizeof(cp), &cp); - } - - if (!test_and_set_bit(HCI_CONN_MODE_CHANGE_PEND, &conn->pend)) { - struct hci_cp_sniff_mode cp; - cp.handle = cpu_to_le16(conn->handle); - cp.max_interval = cpu_to_le16(hdev->sniff_max_interval); - cp.min_interval = cpu_to_le16(hdev->sniff_min_interval); - cp.attempt = cpu_to_le16(4); - cp.timeout = cpu_to_le16(1); - hci_send_cmd(hdev, HCI_OP_SNIFF_MODE, sizeof(cp), &cp); - } -} - /* Drop all connection on the device */ void hci_conn_hash_flush(struct hci_dev *hdev) { struct hci_conn_hash *h = &hdev->conn_hash; - struct list_head *p; + struct hci_conn *c; BT_DBG("hdev %s", hdev->name); - p = h->list.next; - while (p != &h->list) { - struct hci_conn *c; - - c = list_entry(p, struct hci_conn, list); - p = p->next; - + list_for_each_entry_rcu(c, &h->list, list) { c->state = BT_CLOSED; - hci_proto_disconn_cfm(c, 0x16); + hci_proto_disconn_cfm(c, HCI_ERROR_LOCAL_HOST_TERM); hci_conn_del(c); } } @@ -855,10 +840,10 @@ EXPORT_SYMBOL(hci_conn_put_device); int hci_get_conn_list(void __user *arg) { + register struct hci_conn *c; struct hci_conn_list_req req, *cl; struct hci_conn_info *ci; struct hci_dev *hdev; - struct list_head *p; int n = 0, size, err; if (copy_from_user(&req, arg, sizeof(req))) @@ -881,11 +866,8 @@ int hci_get_conn_list(void __user *arg) ci = cl->conn_info; - hci_dev_lock_bh(hdev); - list_for_each(p, &hdev->conn_hash.list) { - register struct hci_conn *c; - c = list_entry(p, struct hci_conn, list); - + hci_dev_lock(hdev); + list_for_each_entry(c, &hdev->conn_hash.list, list) { bacpy(&(ci + n)->bdaddr, &c->dst); (ci + n)->handle = c->handle; (ci + n)->type = c->type; @@ -895,7 +877,7 @@ int hci_get_conn_list(void __user *arg) if (++n >= req.conn_num) break; } - hci_dev_unlock_bh(hdev); + hci_dev_unlock(hdev); cl->dev_id = hdev->id; cl->conn_num = n; @@ -919,7 +901,7 @@ int hci_get_conn_info(struct hci_dev *hdev, void __user *arg) if (copy_from_user(&req, arg, sizeof(req))) return -EFAULT; - hci_dev_lock_bh(hdev); + hci_dev_lock(hdev); conn = hci_conn_hash_lookup_ba(hdev, req.type, &req.bdaddr); if (conn) { bacpy(&ci.bdaddr, &conn->dst); @@ -929,7 +911,7 @@ int hci_get_conn_info(struct hci_dev *hdev, void __user *arg) ci.state = conn->state; ci.link_mode = conn->link_mode; } - hci_dev_unlock_bh(hdev); + hci_dev_unlock(hdev); if (!conn) return -ENOENT; @@ -945,14 +927,60 @@ int hci_get_auth_info(struct hci_dev *hdev, void __user *arg) if (copy_from_user(&req, arg, sizeof(req))) return -EFAULT; - hci_dev_lock_bh(hdev); + hci_dev_lock(hdev); conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &req.bdaddr); if (conn) req.type = conn->auth_type; - hci_dev_unlock_bh(hdev); + hci_dev_unlock(hdev); if (!conn) return -ENOENT; return copy_to_user(arg, &req, sizeof(req)) ? -EFAULT : 0; } + +struct hci_chan *hci_chan_create(struct hci_conn *conn) +{ + struct hci_dev *hdev = conn->hdev; + struct hci_chan *chan; + + BT_DBG("%s conn %p", hdev->name, conn); + + chan = kzalloc(sizeof(struct hci_chan), GFP_ATOMIC); + if (!chan) + return NULL; + + chan->conn = conn; + skb_queue_head_init(&chan->data_q); + + list_add_rcu(&chan->list, &conn->chan_list); + + return chan; +} + +int hci_chan_del(struct hci_chan *chan) +{ + struct hci_conn *conn = chan->conn; + struct hci_dev *hdev = conn->hdev; + + BT_DBG("%s conn %p chan %p", hdev->name, conn, chan); + + list_del_rcu(&chan->list); + + synchronize_rcu(); + + skb_queue_purge(&chan->data_q); + kfree(chan); + + return 0; +} + +void hci_chan_list_flush(struct hci_conn *conn) +{ + struct hci_chan *chan; + + BT_DBG("conn %p", conn); + + list_for_each_entry_rcu(chan, &conn->chan_list, list) + hci_chan_del(chan); +} diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 56943add45cc..845da3ee56a0 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1,6 +1,7 @@ /* BlueZ - Bluetooth protocol stack for Linux Copyright (C) 2000-2001 Qualcomm Incorporated + Copyright (C) 2011 ProFUSION Embedded Systems Written 2000,2001 by Maxim Krasnyansky <maxk@qualcomm.com> @@ -54,11 +55,11 @@ #define AUTO_OFF_TIMEOUT 2000 -static void hci_cmd_task(unsigned long arg); -static void hci_rx_task(unsigned long arg); -static void hci_tx_task(unsigned long arg); +int enable_hs; -static DEFINE_RWLOCK(hci_task_lock); +static void hci_rx_work(struct work_struct *work); +static void hci_cmd_work(struct work_struct *work); +static void hci_tx_work(struct work_struct *work); /* HCI device list */ LIST_HEAD(hci_dev_list); @@ -68,10 +69,6 @@ DEFINE_RWLOCK(hci_dev_list_lock); LIST_HEAD(hci_cb_list); DEFINE_RWLOCK(hci_cb_list_lock); -/* HCI protocols */ -#define HCI_MAX_PROTO 2 -struct hci_proto *hci_proto[HCI_MAX_PROTO]; - /* HCI notifiers list */ static ATOMIC_NOTIFIER_HEAD(hci_notifier); @@ -190,33 +187,20 @@ static void hci_reset_req(struct hci_dev *hdev, unsigned long opt) hci_send_cmd(hdev, HCI_OP_RESET, 0, NULL); } -static void hci_init_req(struct hci_dev *hdev, unsigned long opt) +static void bredr_init(struct hci_dev *hdev) { struct hci_cp_delete_stored_link_key cp; - struct sk_buff *skb; __le16 param; __u8 flt_type; - BT_DBG("%s %ld", hdev->name, opt); - - /* Driver initialization */ - - /* Special commands */ - while ((skb = skb_dequeue(&hdev->driver_init))) { - bt_cb(skb)->pkt_type = HCI_COMMAND_PKT; - skb->dev = (void *) hdev; - - skb_queue_tail(&hdev->cmd_q, skb); - tasklet_schedule(&hdev->cmd_task); - } - skb_queue_purge(&hdev->driver_init); + hdev->flow_ctl_mode = HCI_FLOW_CTL_MODE_PACKET_BASED; /* Mandatory initialization */ /* Reset */ if (!test_bit(HCI_QUIRK_NO_RESET, &hdev->quirks)) { - set_bit(HCI_RESET, &hdev->flags); - hci_send_cmd(hdev, HCI_OP_RESET, 0, NULL); + set_bit(HCI_RESET, &hdev->flags); + hci_send_cmd(hdev, HCI_OP_RESET, 0, NULL); } /* Read Local Supported Features */ @@ -228,18 +212,6 @@ static void hci_init_req(struct hci_dev *hdev, unsigned long opt) /* Read Buffer Size (ACL mtu, max pkt, etc.) */ hci_send_cmd(hdev, HCI_OP_READ_BUFFER_SIZE, 0, NULL); -#if 0 - /* Host buffer size */ - { - struct hci_cp_host_buffer_size cp; - cp.acl_mtu = cpu_to_le16(HCI_MAX_ACL_SIZE); - cp.sco_mtu = HCI_MAX_SCO_SIZE; - cp.acl_max_pkt = cpu_to_le16(0xffff); - cp.sco_max_pkt = cpu_to_le16(0xffff); - hci_send_cmd(hdev, HCI_OP_HOST_BUFFER_SIZE, sizeof(cp), &cp); - } -#endif - /* Read BD Address */ hci_send_cmd(hdev, HCI_OP_READ_BD_ADDR, 0, NULL); @@ -267,6 +239,51 @@ static void hci_init_req(struct hci_dev *hdev, unsigned long opt) hci_send_cmd(hdev, HCI_OP_DELETE_STORED_LINK_KEY, sizeof(cp), &cp); } +static void amp_init(struct hci_dev *hdev) +{ + hdev->flow_ctl_mode = HCI_FLOW_CTL_MODE_BLOCK_BASED; + + /* Reset */ + hci_send_cmd(hdev, HCI_OP_RESET, 0, NULL); + + /* Read Local Version */ + hci_send_cmd(hdev, HCI_OP_READ_LOCAL_VERSION, 0, NULL); +} + +static void hci_init_req(struct hci_dev *hdev, unsigned long opt) +{ + struct sk_buff *skb; + + BT_DBG("%s %ld", hdev->name, opt); + + /* Driver initialization */ + + /* Special commands */ + while ((skb = skb_dequeue(&hdev->driver_init))) { + bt_cb(skb)->pkt_type = HCI_COMMAND_PKT; + skb->dev = (void *) hdev; + + skb_queue_tail(&hdev->cmd_q, skb); + queue_work(hdev->workqueue, &hdev->cmd_work); + } + skb_queue_purge(&hdev->driver_init); + + switch (hdev->dev_type) { + case HCI_BREDR: + bredr_init(hdev); + break; + + case HCI_AMP: + amp_init(hdev); + break; + + default: + BT_ERR("Unknown device type %d", hdev->dev_type); + break; + } + +} + static void hci_le_init_req(struct hci_dev *hdev, unsigned long opt) { BT_DBG("%s", hdev->name); @@ -319,8 +336,7 @@ static void hci_linkpol_req(struct hci_dev *hdev, unsigned long opt) * Device is held on return. */ struct hci_dev *hci_dev_get(int index) { - struct hci_dev *hdev = NULL; - struct list_head *p; + struct hci_dev *hdev = NULL, *d; BT_DBG("%d", index); @@ -328,8 +344,7 @@ struct hci_dev *hci_dev_get(int index) return NULL; read_lock(&hci_dev_list_lock); - list_for_each(p, &hci_dev_list) { - struct hci_dev *d = list_entry(p, struct hci_dev, list); + list_for_each_entry(d, &hci_dev_list, list) { if (d->id == index) { hdev = hci_dev_hold(d); break; @@ -445,14 +460,14 @@ int hci_inquiry(void __user *arg) if (!hdev) return -ENODEV; - hci_dev_lock_bh(hdev); + hci_dev_lock(hdev); if (inquiry_cache_age(hdev) > INQUIRY_CACHE_AGE_MAX || inquiry_cache_empty(hdev) || ir.flags & IREQ_CACHE_FLUSH) { inquiry_cache_flush(hdev); do_inquiry = 1; } - hci_dev_unlock_bh(hdev); + hci_dev_unlock(hdev); timeo = ir.length * msecs_to_jiffies(2000); @@ -474,9 +489,9 @@ int hci_inquiry(void __user *arg) goto done; } - hci_dev_lock_bh(hdev); + hci_dev_lock(hdev); ir.num_rsp = inquiry_cache_dump(hdev, max_rsp, buf); - hci_dev_unlock_bh(hdev); + hci_dev_unlock(hdev); BT_DBG("num_rsp %d", ir.num_rsp); @@ -523,8 +538,9 @@ int hci_dev_open(__u16 dev) if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks)) set_bit(HCI_RAW, &hdev->flags); - /* Treat all non BR/EDR controllers as raw devices for now */ - if (hdev->dev_type != HCI_BREDR) + /* Treat all non BR/EDR controllers as raw devices if + enable_hs is not set */ + if (hdev->dev_type != HCI_BREDR && !enable_hs) set_bit(HCI_RAW, &hdev->flags); if (hdev->open(hdev)) { @@ -551,13 +567,16 @@ int hci_dev_open(__u16 dev) hci_dev_hold(hdev); set_bit(HCI_UP, &hdev->flags); hci_notify(hdev, HCI_DEV_UP); - if (!test_bit(HCI_SETUP, &hdev->flags)) - mgmt_powered(hdev->id, 1); + if (!test_bit(HCI_SETUP, &hdev->flags)) { + hci_dev_lock(hdev); + mgmt_powered(hdev, 1); + hci_dev_unlock(hdev); + } } else { /* Init failed, cleanup */ - tasklet_kill(&hdev->rx_task); - tasklet_kill(&hdev->tx_task); - tasklet_kill(&hdev->cmd_task); + flush_work(&hdev->tx_work); + flush_work(&hdev->cmd_work); + flush_work(&hdev->rx_work); skb_queue_purge(&hdev->cmd_q); skb_queue_purge(&hdev->rx_q); @@ -593,14 +612,25 @@ static int hci_dev_do_close(struct hci_dev *hdev) return 0; } - /* Kill RX and TX tasks */ - tasklet_kill(&hdev->rx_task); - tasklet_kill(&hdev->tx_task); + /* Flush RX and TX works */ + flush_work(&hdev->tx_work); + flush_work(&hdev->rx_work); + + if (hdev->discov_timeout > 0) { + cancel_delayed_work(&hdev->discov_off); + hdev->discov_timeout = 0; + } + + if (test_and_clear_bit(HCI_AUTO_OFF, &hdev->flags)) + cancel_delayed_work(&hdev->power_off); + + if (test_and_clear_bit(HCI_SERVICE_CACHE, &hdev->flags)) + cancel_delayed_work(&hdev->service_cache); - hci_dev_lock_bh(hdev); + hci_dev_lock(hdev); inquiry_cache_flush(hdev); hci_conn_hash_flush(hdev); - hci_dev_unlock_bh(hdev); + hci_dev_unlock(hdev); hci_notify(hdev, HCI_DEV_DOWN); @@ -617,8 +647,8 @@ static int hci_dev_do_close(struct hci_dev *hdev) clear_bit(HCI_INIT, &hdev->flags); } - /* Kill cmd task */ - tasklet_kill(&hdev->cmd_task); + /* flush cmd work */ + flush_work(&hdev->cmd_work); /* Drop queues */ skb_queue_purge(&hdev->rx_q); @@ -636,7 +666,9 @@ static int hci_dev_do_close(struct hci_dev *hdev) * and no tasks are scheduled. */ hdev->close(hdev); - mgmt_powered(hdev->id, 0); + hci_dev_lock(hdev); + mgmt_powered(hdev, 0); + hci_dev_unlock(hdev); /* Clear flags */ hdev->flags = 0; @@ -670,7 +702,6 @@ int hci_dev_reset(__u16 dev) return -ENODEV; hci_req_lock(hdev); - tasklet_disable(&hdev->tx_task); if (!test_bit(HCI_UP, &hdev->flags)) goto done; @@ -679,10 +710,10 @@ int hci_dev_reset(__u16 dev) skb_queue_purge(&hdev->rx_q); skb_queue_purge(&hdev->cmd_q); - hci_dev_lock_bh(hdev); + hci_dev_lock(hdev); inquiry_cache_flush(hdev); hci_conn_hash_flush(hdev); - hci_dev_unlock_bh(hdev); + hci_dev_unlock(hdev); if (hdev->flush) hdev->flush(hdev); @@ -695,7 +726,6 @@ int hci_dev_reset(__u16 dev) msecs_to_jiffies(HCI_INIT_TIMEOUT)); done: - tasklet_enable(&hdev->tx_task); hci_req_unlock(hdev); hci_dev_put(hdev); return ret; @@ -794,9 +824,9 @@ int hci_dev_cmd(unsigned int cmd, void __user *arg) int hci_get_dev_list(void __user *arg) { + struct hci_dev *hdev; struct hci_dev_list_req *dl; struct hci_dev_req *dr; - struct list_head *p; int n = 0, size, err; __u16 dev_num; @@ -814,13 +844,10 @@ int hci_get_dev_list(void __user *arg) dr = dl->dev_req; - read_lock_bh(&hci_dev_list_lock); - list_for_each(p, &hci_dev_list) { - struct hci_dev *hdev; - - hdev = list_entry(p, struct hci_dev, list); - - hci_del_off_timer(hdev); + read_lock(&hci_dev_list_lock); + list_for_each_entry(hdev, &hci_dev_list, list) { + if (test_and_clear_bit(HCI_AUTO_OFF, &hdev->flags)) + cancel_delayed_work(&hdev->power_off); if (!test_bit(HCI_MGMT, &hdev->flags)) set_bit(HCI_PAIRABLE, &hdev->flags); @@ -831,7 +858,7 @@ int hci_get_dev_list(void __user *arg) if (++n >= dev_num) break; } - read_unlock_bh(&hci_dev_list_lock); + read_unlock(&hci_dev_list_lock); dl->dev_num = n; size = sizeof(*dl) + n * sizeof(*dr); @@ -855,7 +882,8 @@ int hci_get_dev_info(void __user *arg) if (!hdev) return -ENODEV; - hci_del_off_timer(hdev); + if (test_and_clear_bit(HCI_AUTO_OFF, &hdev->flags)) + cancel_delayed_work_sync(&hdev->power_off); if (!test_bit(HCI_MGMT, &hdev->flags)) set_bit(HCI_PAIRABLE, &hdev->flags); @@ -912,6 +940,7 @@ struct hci_dev *hci_alloc_dev(void) if (!hdev) return NULL; + hci_init_sysfs(hdev); skb_queue_head_init(&hdev->driver_init); return hdev; @@ -938,39 +967,41 @@ static void hci_power_on(struct work_struct *work) return; if (test_bit(HCI_AUTO_OFF, &hdev->flags)) - mod_timer(&hdev->off_timer, - jiffies + msecs_to_jiffies(AUTO_OFF_TIMEOUT)); + schedule_delayed_work(&hdev->power_off, + msecs_to_jiffies(AUTO_OFF_TIMEOUT)); if (test_and_clear_bit(HCI_SETUP, &hdev->flags)) - mgmt_index_added(hdev->id); + mgmt_index_added(hdev); } static void hci_power_off(struct work_struct *work) { - struct hci_dev *hdev = container_of(work, struct hci_dev, power_off); + struct hci_dev *hdev = container_of(work, struct hci_dev, + power_off.work); BT_DBG("%s", hdev->name); + clear_bit(HCI_AUTO_OFF, &hdev->flags); + hci_dev_close(hdev->id); } -static void hci_auto_off(unsigned long data) +static void hci_discov_off(struct work_struct *work) { - struct hci_dev *hdev = (struct hci_dev *) data; + struct hci_dev *hdev; + u8 scan = SCAN_PAGE; + + hdev = container_of(work, struct hci_dev, discov_off.work); BT_DBG("%s", hdev->name); - clear_bit(HCI_AUTO_OFF, &hdev->flags); + hci_dev_lock(hdev); - queue_work(hdev->workqueue, &hdev->power_off); -} + hci_send_cmd(hdev, HCI_OP_WRITE_SCAN_ENABLE, sizeof(scan), &scan); -void hci_del_off_timer(struct hci_dev *hdev) -{ - BT_DBG("%s", hdev->name); + hdev->discov_timeout = 0; - clear_bit(HCI_AUTO_OFF, &hdev->flags); - del_timer(&hdev->off_timer); + hci_dev_unlock(hdev); } int hci_uuids_clear(struct hci_dev *hdev) @@ -1007,16 +1038,11 @@ int hci_link_keys_clear(struct hci_dev *hdev) struct link_key *hci_find_link_key(struct hci_dev *hdev, bdaddr_t *bdaddr) { - struct list_head *p; - - list_for_each(p, &hdev->link_keys) { - struct link_key *k; - - k = list_entry(p, struct link_key, list); + struct link_key *k; + list_for_each_entry(k, &hdev->link_keys, list) if (bacmp(bdaddr, &k->bdaddr) == 0) return k; - } return NULL; } @@ -1138,7 +1164,7 @@ int hci_add_link_key(struct hci_dev *hdev, struct hci_conn *conn, int new_key, persistent = hci_persistent_key(hdev, conn, type, old_key_type); - mgmt_new_key(hdev->id, key, persistent); + mgmt_new_link_key(hdev, key, persistent); if (!persistent) { list_del(&key->list); @@ -1181,7 +1207,7 @@ int hci_add_ltk(struct hci_dev *hdev, int new_key, bdaddr_t *bdaddr, memcpy(id->rand, rand, sizeof(id->rand)); if (new_key) - mgmt_new_key(hdev->id, key, old_key_type); + mgmt_new_link_key(hdev, key, old_key_type); return 0; } @@ -1209,7 +1235,7 @@ static void hci_cmd_timer(unsigned long arg) BT_ERR("%s command tx timeout", hdev->name); atomic_set(&hdev->cmd_cnt, 1); - tasklet_schedule(&hdev->cmd_task); + queue_work(hdev->workqueue, &hdev->cmd_work); } struct oob_data *hci_find_remote_oob_data(struct hci_dev *hdev, @@ -1279,16 +1305,11 @@ int hci_add_remote_oob_data(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 *hash, struct bdaddr_list *hci_blacklist_lookup(struct hci_dev *hdev, bdaddr_t *bdaddr) { - struct list_head *p; - - list_for_each(p, &hdev->blacklist) { - struct bdaddr_list *b; - - b = list_entry(p, struct bdaddr_list, list); + struct bdaddr_list *b; + list_for_each_entry(b, &hdev->blacklist, list) if (bacmp(bdaddr, &b->bdaddr) == 0) return b; - } return NULL; } @@ -1312,64 +1333,45 @@ int hci_blacklist_clear(struct hci_dev *hdev) int hci_blacklist_add(struct hci_dev *hdev, bdaddr_t *bdaddr) { struct bdaddr_list *entry; - int err; if (bacmp(bdaddr, BDADDR_ANY) == 0) return -EBADF; - hci_dev_lock_bh(hdev); - - if (hci_blacklist_lookup(hdev, bdaddr)) { - err = -EEXIST; - goto err; - } + if (hci_blacklist_lookup(hdev, bdaddr)) + return -EEXIST; entry = kzalloc(sizeof(struct bdaddr_list), GFP_KERNEL); - if (!entry) { - err = -ENOMEM; - goto err; - } + if (!entry) + return -ENOMEM; bacpy(&entry->bdaddr, bdaddr); list_add(&entry->list, &hdev->blacklist); - err = 0; - -err: - hci_dev_unlock_bh(hdev); - return err; + return mgmt_device_blocked(hdev, bdaddr); } int hci_blacklist_del(struct hci_dev *hdev, bdaddr_t *bdaddr) { struct bdaddr_list *entry; - int err = 0; - hci_dev_lock_bh(hdev); - - if (bacmp(bdaddr, BDADDR_ANY) == 0) { - hci_blacklist_clear(hdev); - goto done; - } + if (bacmp(bdaddr, BDADDR_ANY) == 0) + return hci_blacklist_clear(hdev); entry = hci_blacklist_lookup(hdev, bdaddr); - if (!entry) { - err = -ENOENT; - goto done; - } + if (!entry) + return -ENOENT; list_del(&entry->list); kfree(entry); -done: - hci_dev_unlock_bh(hdev); - return err; + return mgmt_device_unblocked(hdev, bdaddr); } -static void hci_clear_adv_cache(unsigned long arg) +static void hci_clear_adv_cache(struct work_struct *work) { - struct hci_dev *hdev = (void *) arg; + struct hci_dev *hdev = container_of(work, struct hci_dev, + adv_work.work); hci_dev_lock(hdev); @@ -1443,7 +1445,7 @@ int hci_add_adv_entry(struct hci_dev *hdev, int hci_register_dev(struct hci_dev *hdev) { struct list_head *head = &hci_dev_list, *p; - int i, id = 0; + int i, id, error; BT_DBG("%p name %s bus %d owner %p", hdev, hdev->name, hdev->bus, hdev->owner); @@ -1451,7 +1453,12 @@ int hci_register_dev(struct hci_dev *hdev) if (!hdev->open || !hdev->close || !hdev->destruct) return -EINVAL; - write_lock_bh(&hci_dev_list_lock); + /* Do not allow HCI_AMP devices to register at index 0, + * so the index can be used as the AMP controller ID. + */ + id = (hdev->dev_type == HCI_BREDR) ? 0 : 1; + + write_lock(&hci_dev_list_lock); /* Find first available device id */ list_for_each(p, &hci_dev_list) { @@ -1462,12 +1469,13 @@ int hci_register_dev(struct hci_dev *hdev) sprintf(hdev->name, "hci%d", id); hdev->id = id; - list_add(&hdev->list, head); + list_add_tail(&hdev->list, head); atomic_set(&hdev->refcnt, 1); - spin_lock_init(&hdev->lock); + mutex_init(&hdev->lock); hdev->flags = 0; + hdev->dev_flags = 0; hdev->pkt_type = (HCI_DM1 | HCI_DH1 | HCI_HV1); hdev->esco_type = (ESCO_HV1); hdev->link_mode = (HCI_LM_ACCEPT); @@ -1477,9 +1485,10 @@ int hci_register_dev(struct hci_dev *hdev) hdev->sniff_max_interval = 800; hdev->sniff_min_interval = 80; - tasklet_init(&hdev->cmd_task, hci_cmd_task, (unsigned long) hdev); - tasklet_init(&hdev->rx_task, hci_rx_task, (unsigned long) hdev); - tasklet_init(&hdev->tx_task, hci_tx_task, (unsigned long) hdev); + INIT_WORK(&hdev->rx_work, hci_rx_work); + INIT_WORK(&hdev->cmd_work, hci_cmd_work); + INIT_WORK(&hdev->tx_work, hci_tx_work); + skb_queue_head_init(&hdev->rx_q); skb_queue_head_init(&hdev->cmd_q); @@ -1497,6 +1506,8 @@ int hci_register_dev(struct hci_dev *hdev) hci_conn_hash_init(hdev); + INIT_LIST_HEAD(&hdev->mgmt_pending); + INIT_LIST_HEAD(&hdev->blacklist); INIT_LIST_HEAD(&hdev->uuids); @@ -1506,29 +1517,29 @@ int hci_register_dev(struct hci_dev *hdev) INIT_LIST_HEAD(&hdev->remote_oob_data); INIT_LIST_HEAD(&hdev->adv_entries); - setup_timer(&hdev->adv_timer, hci_clear_adv_cache, - (unsigned long) hdev); + INIT_DELAYED_WORK(&hdev->adv_work, hci_clear_adv_cache); INIT_WORK(&hdev->power_on, hci_power_on); - INIT_WORK(&hdev->power_off, hci_power_off); - setup_timer(&hdev->off_timer, hci_auto_off, (unsigned long) hdev); + INIT_DELAYED_WORK(&hdev->power_off, hci_power_off); + + INIT_DELAYED_WORK(&hdev->discov_off, hci_discov_off); memset(&hdev->stat, 0, sizeof(struct hci_dev_stats)); atomic_set(&hdev->promisc, 0); - write_unlock_bh(&hci_dev_list_lock); + write_unlock(&hci_dev_list_lock); - hdev->workqueue = create_singlethread_workqueue(hdev->name); - if (!hdev->workqueue) - goto nomem; - - hdev->tfm = crypto_alloc_blkcipher("ecb(aes)", 0, CRYPTO_ALG_ASYNC); - if (IS_ERR(hdev->tfm)) - BT_INFO("Failed to load transform for ecb(aes): %ld", - PTR_ERR(hdev->tfm)); + hdev->workqueue = alloc_workqueue(hdev->name, WQ_HIGHPRI | WQ_UNBOUND | + WQ_MEM_RECLAIM, 1); + if (!hdev->workqueue) { + error = -ENOMEM; + goto err; + } - hci_register_sysfs(hdev); + error = hci_add_sysfs(hdev); + if (error < 0) + goto err_wqueue; hdev->rfkill = rfkill_alloc(hdev->name, &hdev->dev, RFKILL_TYPE_BLUETOOTH, &hci_rfkill_ops, hdev); @@ -1541,31 +1552,33 @@ int hci_register_dev(struct hci_dev *hdev) set_bit(HCI_AUTO_OFF, &hdev->flags); set_bit(HCI_SETUP, &hdev->flags); - queue_work(hdev->workqueue, &hdev->power_on); + schedule_work(&hdev->power_on); hci_notify(hdev, HCI_DEV_REG); return id; -nomem: - write_lock_bh(&hci_dev_list_lock); +err_wqueue: + destroy_workqueue(hdev->workqueue); +err: + write_lock(&hci_dev_list_lock); list_del(&hdev->list); - write_unlock_bh(&hci_dev_list_lock); + write_unlock(&hci_dev_list_lock); - return -ENOMEM; + return error; } EXPORT_SYMBOL(hci_register_dev); /* Unregister HCI device */ -int hci_unregister_dev(struct hci_dev *hdev) +void hci_unregister_dev(struct hci_dev *hdev) { int i; BT_DBG("%p name %s bus %d", hdev, hdev->name, hdev->bus); - write_lock_bh(&hci_dev_list_lock); + write_lock(&hci_dev_list_lock); list_del(&hdev->list); - write_unlock_bh(&hci_dev_list_lock); + write_unlock(&hci_dev_list_lock); hci_dev_do_close(hdev); @@ -1573,11 +1586,15 @@ int hci_unregister_dev(struct hci_dev *hdev) kfree_skb(hdev->reassembly[i]); if (!test_bit(HCI_INIT, &hdev->flags) && - !test_bit(HCI_SETUP, &hdev->flags)) - mgmt_index_removed(hdev->id); + !test_bit(HCI_SETUP, &hdev->flags)) { + hci_dev_lock(hdev); + mgmt_index_removed(hdev); + hci_dev_unlock(hdev); + } - if (!IS_ERR(hdev->tfm)) - crypto_free_blkcipher(hdev->tfm); + /* mgmt_index_removed should take care of emptying the + * pending list */ + BUG_ON(!list_empty(&hdev->mgmt_pending)); hci_notify(hdev, HCI_DEV_UNREG); @@ -1586,24 +1603,21 @@ int hci_unregister_dev(struct hci_dev *hdev) rfkill_destroy(hdev->rfkill); } - hci_unregister_sysfs(hdev); + hci_del_sysfs(hdev); - hci_del_off_timer(hdev); - del_timer(&hdev->adv_timer); + cancel_delayed_work_sync(&hdev->adv_work); destroy_workqueue(hdev->workqueue); - hci_dev_lock_bh(hdev); + hci_dev_lock(hdev); hci_blacklist_clear(hdev); hci_uuids_clear(hdev); hci_link_keys_clear(hdev); hci_remote_oob_data_clear(hdev); hci_adv_entries_clear(hdev); - hci_dev_unlock_bh(hdev); + hci_dev_unlock(hdev); __hci_dev_put(hdev); - - return 0; } EXPORT_SYMBOL(hci_unregister_dev); @@ -1639,9 +1653,8 @@ int hci_recv_frame(struct sk_buff *skb) /* Time stamp */ __net_timestamp(skb); - /* Queue frame for rx task */ skb_queue_tail(&hdev->rx_q, skb); - tasklet_schedule(&hdev->rx_task); + queue_work(hdev->workqueue, &hdev->rx_work); return 0; } @@ -1813,59 +1826,13 @@ EXPORT_SYMBOL(hci_recv_stream_fragment); /* ---- Interface to upper protocols ---- */ -/* Register/Unregister protocols. - * hci_task_lock is used to ensure that no tasks are running. */ -int hci_register_proto(struct hci_proto *hp) -{ - int err = 0; - - BT_DBG("%p name %s id %d", hp, hp->name, hp->id); - - if (hp->id >= HCI_MAX_PROTO) - return -EINVAL; - - write_lock_bh(&hci_task_lock); - - if (!hci_proto[hp->id]) - hci_proto[hp->id] = hp; - else - err = -EEXIST; - - write_unlock_bh(&hci_task_lock); - - return err; -} -EXPORT_SYMBOL(hci_register_proto); - -int hci_unregister_proto(struct hci_proto *hp) -{ - int err = 0; - - BT_DBG("%p name %s id %d", hp, hp->name, hp->id); - - if (hp->id >= HCI_MAX_PROTO) - return -EINVAL; - - write_lock_bh(&hci_task_lock); - - if (hci_proto[hp->id]) - hci_proto[hp->id] = NULL; - else - err = -ENOENT; - - write_unlock_bh(&hci_task_lock); - - return err; -} -EXPORT_SYMBOL(hci_unregister_proto); - int hci_register_cb(struct hci_cb *cb) { BT_DBG("%p name %s", cb, cb->name); - write_lock_bh(&hci_cb_list_lock); + write_lock(&hci_cb_list_lock); list_add(&cb->list, &hci_cb_list); - write_unlock_bh(&hci_cb_list_lock); + write_unlock(&hci_cb_list_lock); return 0; } @@ -1875,9 +1842,9 @@ int hci_unregister_cb(struct hci_cb *cb) { BT_DBG("%p name %s", cb, cb->name); - write_lock_bh(&hci_cb_list_lock); + write_lock(&hci_cb_list_lock); list_del(&cb->list); - write_unlock_bh(&hci_cb_list_lock); + write_unlock(&hci_cb_list_lock); return 0; } @@ -1938,7 +1905,7 @@ int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, void *param) hdev->init_last_cmd = opcode; skb_queue_tail(&hdev->cmd_q, skb); - tasklet_schedule(&hdev->cmd_task); + queue_work(hdev->workqueue, &hdev->cmd_work); return 0; } @@ -1974,23 +1941,18 @@ static void hci_add_acl_hdr(struct sk_buff *skb, __u16 handle, __u16 flags) hdr->dlen = cpu_to_le16(len); } -void hci_send_acl(struct hci_conn *conn, struct sk_buff *skb, __u16 flags) +static void hci_queue_acl(struct hci_conn *conn, struct sk_buff_head *queue, + struct sk_buff *skb, __u16 flags) { struct hci_dev *hdev = conn->hdev; struct sk_buff *list; - BT_DBG("%s conn %p flags 0x%x", hdev->name, conn, flags); - - skb->dev = (void *) hdev; - bt_cb(skb)->pkt_type = HCI_ACLDATA_PKT; - hci_add_acl_hdr(skb, conn->handle, flags); - list = skb_shinfo(skb)->frag_list; if (!list) { /* Non fragmented */ BT_DBG("%s nonfrag skb %p len %d", hdev->name, skb, skb->len); - skb_queue_tail(&conn->data_q, skb); + skb_queue_tail(queue, skb); } else { /* Fragmented */ BT_DBG("%s frag %p len %d", hdev->name, skb, skb->len); @@ -1998,9 +1960,9 @@ void hci_send_acl(struct hci_conn *conn, struct sk_buff *skb, __u16 flags) skb_shinfo(skb)->frag_list = NULL; /* Queue all fragments atomically */ - spin_lock_bh(&conn->data_q.lock); + spin_lock(&queue->lock); - __skb_queue_tail(&conn->data_q, skb); + __skb_queue_tail(queue, skb); flags &= ~ACL_START; flags |= ACL_CONT; @@ -2013,13 +1975,27 @@ void hci_send_acl(struct hci_conn *conn, struct sk_buff *skb, __u16 flags) BT_DBG("%s frag %p len %d", hdev->name, skb, skb->len); - __skb_queue_tail(&conn->data_q, skb); + __skb_queue_tail(queue, skb); } while (list); - spin_unlock_bh(&conn->data_q.lock); + spin_unlock(&queue->lock); } +} + +void hci_send_acl(struct hci_chan *chan, struct sk_buff *skb, __u16 flags) +{ + struct hci_conn *conn = chan->conn; + struct hci_dev *hdev = conn->hdev; + + BT_DBG("%s chan %p flags 0x%x", hdev->name, chan, flags); + + skb->dev = (void *) hdev; + bt_cb(skb)->pkt_type = HCI_ACLDATA_PKT; + hci_add_acl_hdr(skb, conn->handle, flags); - tasklet_schedule(&hdev->tx_task); + hci_queue_acl(conn, &chan->data_q, skb, flags); + + queue_work(hdev->workqueue, &hdev->tx_work); } EXPORT_SYMBOL(hci_send_acl); @@ -2042,7 +2018,7 @@ void hci_send_sco(struct hci_conn *conn, struct sk_buff *skb) bt_cb(skb)->pkt_type = HCI_SCODATA_PKT; skb_queue_tail(&conn->data_q, skb); - tasklet_schedule(&hdev->tx_task); + queue_work(hdev->workqueue, &hdev->tx_work); } EXPORT_SYMBOL(hci_send_sco); @@ -2052,16 +2028,15 @@ EXPORT_SYMBOL(hci_send_sco); static inline struct hci_conn *hci_low_sent(struct hci_dev *hdev, __u8 type, int *quote) { struct hci_conn_hash *h = &hdev->conn_hash; - struct hci_conn *conn = NULL; + struct hci_conn *conn = NULL, *c; int num = 0, min = ~0; - struct list_head *p; /* We don't have to lock device here. Connections are always * added and removed with TX task disabled. */ - list_for_each(p, &h->list) { - struct hci_conn *c; - c = list_entry(p, struct hci_conn, list); + rcu_read_lock(); + + list_for_each_entry_rcu(c, &h->list, list) { if (c->type != type || skb_queue_empty(&c->data_q)) continue; @@ -2074,8 +2049,13 @@ static inline struct hci_conn *hci_low_sent(struct hci_dev *hdev, __u8 type, int min = c->sent; conn = c; } + + if (hci_conn_num(hdev, type) == num) + break; } + rcu_read_unlock(); + if (conn) { int cnt, q; @@ -2107,30 +2087,165 @@ static inline struct hci_conn *hci_low_sent(struct hci_dev *hdev, __u8 type, int static inline void hci_link_tx_to(struct hci_dev *hdev, __u8 type) { struct hci_conn_hash *h = &hdev->conn_hash; - struct list_head *p; - struct hci_conn *c; + struct hci_conn *c; BT_ERR("%s link tx timeout", hdev->name); + rcu_read_lock(); + /* Kill stalled connections */ - list_for_each(p, &h->list) { - c = list_entry(p, struct hci_conn, list); + list_for_each_entry_rcu(c, &h->list, list) { if (c->type == type && c->sent) { BT_ERR("%s killing stalled connection %s", hdev->name, batostr(&c->dst)); hci_acl_disconn(c, 0x13); } } + + rcu_read_unlock(); } -static inline void hci_sched_acl(struct hci_dev *hdev) +static inline struct hci_chan *hci_chan_sent(struct hci_dev *hdev, __u8 type, + int *quote) +{ + struct hci_conn_hash *h = &hdev->conn_hash; + struct hci_chan *chan = NULL; + int num = 0, min = ~0, cur_prio = 0; + struct hci_conn *conn; + int cnt, q, conn_num = 0; + + BT_DBG("%s", hdev->name); + + rcu_read_lock(); + + list_for_each_entry_rcu(conn, &h->list, list) { + struct hci_chan *tmp; + + if (conn->type != type) + continue; + + if (conn->state != BT_CONNECTED && conn->state != BT_CONFIG) + continue; + + conn_num++; + + list_for_each_entry_rcu(tmp, &conn->chan_list, list) { + struct sk_buff *skb; + + if (skb_queue_empty(&tmp->data_q)) + continue; + + skb = skb_peek(&tmp->data_q); + if (skb->priority < cur_prio) + continue; + + if (skb->priority > cur_prio) { + num = 0; + min = ~0; + cur_prio = skb->priority; + } + + num++; + + if (conn->sent < min) { + min = conn->sent; + chan = tmp; + } + } + + if (hci_conn_num(hdev, type) == conn_num) + break; + } + + rcu_read_unlock(); + + if (!chan) + return NULL; + + switch (chan->conn->type) { + case ACL_LINK: + cnt = hdev->acl_cnt; + break; + case SCO_LINK: + case ESCO_LINK: + cnt = hdev->sco_cnt; + break; + case LE_LINK: + cnt = hdev->le_mtu ? hdev->le_cnt : hdev->acl_cnt; + break; + default: + cnt = 0; + BT_ERR("Unknown link type"); + } + + q = cnt / num; + *quote = q ? q : 1; + BT_DBG("chan %p quote %d", chan, *quote); + return chan; +} + +static void hci_prio_recalculate(struct hci_dev *hdev, __u8 type) { + struct hci_conn_hash *h = &hdev->conn_hash; struct hci_conn *conn; + int num = 0; + + BT_DBG("%s", hdev->name); + + rcu_read_lock(); + + list_for_each_entry_rcu(conn, &h->list, list) { + struct hci_chan *chan; + + if (conn->type != type) + continue; + + if (conn->state != BT_CONNECTED && conn->state != BT_CONFIG) + continue; + + num++; + + list_for_each_entry_rcu(chan, &conn->chan_list, list) { + struct sk_buff *skb; + + if (chan->sent) { + chan->sent = 0; + continue; + } + + if (skb_queue_empty(&chan->data_q)) + continue; + + skb = skb_peek(&chan->data_q); + if (skb->priority >= HCI_PRIO_MAX - 1) + continue; + + skb->priority = HCI_PRIO_MAX - 1; + + BT_DBG("chan %p skb %p promoted to %d", chan, skb, + skb->priority); + } + + if (hci_conn_num(hdev, type) == num) + break; + } + + rcu_read_unlock(); + +} + +static inline void hci_sched_acl(struct hci_dev *hdev) +{ + struct hci_chan *chan; struct sk_buff *skb; int quote; + unsigned int cnt; BT_DBG("%s", hdev->name); + if (!hci_conn_num(hdev, ACL_LINK)) + return; + if (!test_bit(HCI_RAW, &hdev->flags)) { /* ACL tx timeout must be longer than maximum * link supervision timeout (40.9 seconds) */ @@ -2138,19 +2253,35 @@ static inline void hci_sched_acl(struct hci_dev *hdev) hci_link_tx_to(hdev, ACL_LINK); } - while (hdev->acl_cnt && (conn = hci_low_sent(hdev, ACL_LINK, "e))) { - while (quote-- && (skb = skb_dequeue(&conn->data_q))) { - BT_DBG("skb %p len %d", skb, skb->len); + cnt = hdev->acl_cnt; - hci_conn_enter_active_mode(conn, bt_cb(skb)->force_active); + while (hdev->acl_cnt && + (chan = hci_chan_sent(hdev, ACL_LINK, "e))) { + u32 priority = (skb_peek(&chan->data_q))->priority; + while (quote-- && (skb = skb_peek(&chan->data_q))) { + BT_DBG("chan %p skb %p len %d priority %u", chan, skb, + skb->len, skb->priority); + + /* Stop if priority has changed */ + if (skb->priority < priority) + break; + + skb = skb_dequeue(&chan->data_q); + + hci_conn_enter_active_mode(chan->conn, + bt_cb(skb)->force_active); hci_send_frame(skb); hdev->acl_last_tx = jiffies; hdev->acl_cnt--; - conn->sent++; + chan->sent++; + chan->conn->sent++; } } + + if (cnt != hdev->acl_cnt) + hci_prio_recalculate(hdev, ACL_LINK); } /* Schedule SCO */ @@ -2162,6 +2293,9 @@ static inline void hci_sched_sco(struct hci_dev *hdev) BT_DBG("%s", hdev->name); + if (!hci_conn_num(hdev, SCO_LINK)) + return; + while (hdev->sco_cnt && (conn = hci_low_sent(hdev, SCO_LINK, "e))) { while (quote-- && (skb = skb_dequeue(&conn->data_q))) { BT_DBG("skb %p len %d", skb, skb->len); @@ -2182,6 +2316,9 @@ static inline void hci_sched_esco(struct hci_dev *hdev) BT_DBG("%s", hdev->name); + if (!hci_conn_num(hdev, ESCO_LINK)) + return; + while (hdev->sco_cnt && (conn = hci_low_sent(hdev, ESCO_LINK, "e))) { while (quote-- && (skb = skb_dequeue(&conn->data_q))) { BT_DBG("skb %p len %d", skb, skb->len); @@ -2196,12 +2333,15 @@ static inline void hci_sched_esco(struct hci_dev *hdev) static inline void hci_sched_le(struct hci_dev *hdev) { - struct hci_conn *conn; + struct hci_chan *chan; struct sk_buff *skb; - int quote, cnt; + int quote, cnt, tmp; BT_DBG("%s", hdev->name); + if (!hci_conn_num(hdev, LE_LINK)) + return; + if (!test_bit(HCI_RAW, &hdev->flags)) { /* LE tx timeout must be longer than maximum * link supervision timeout (40.9 seconds) */ @@ -2211,30 +2351,42 @@ static inline void hci_sched_le(struct hci_dev *hdev) } cnt = hdev->le_pkts ? hdev->le_cnt : hdev->acl_cnt; - while (cnt && (conn = hci_low_sent(hdev, LE_LINK, "e))) { - while (quote-- && (skb = skb_dequeue(&conn->data_q))) { - BT_DBG("skb %p len %d", skb, skb->len); + tmp = cnt; + while (cnt && (chan = hci_chan_sent(hdev, LE_LINK, "e))) { + u32 priority = (skb_peek(&chan->data_q))->priority; + while (quote-- && (skb = skb_peek(&chan->data_q))) { + BT_DBG("chan %p skb %p len %d priority %u", chan, skb, + skb->len, skb->priority); + + /* Stop if priority has changed */ + if (skb->priority < priority) + break; + + skb = skb_dequeue(&chan->data_q); hci_send_frame(skb); hdev->le_last_tx = jiffies; cnt--; - conn->sent++; + chan->sent++; + chan->conn->sent++; } } + if (hdev->le_pkts) hdev->le_cnt = cnt; else hdev->acl_cnt = cnt; + + if (cnt != tmp) + hci_prio_recalculate(hdev, LE_LINK); } -static void hci_tx_task(unsigned long arg) +static void hci_tx_work(struct work_struct *work) { - struct hci_dev *hdev = (struct hci_dev *) arg; + struct hci_dev *hdev = container_of(work, struct hci_dev, tx_work); struct sk_buff *skb; - read_lock(&hci_task_lock); - BT_DBG("%s acl %d sco %d le %d", hdev->name, hdev->acl_cnt, hdev->sco_cnt, hdev->le_cnt); @@ -2251,8 +2403,6 @@ static void hci_tx_task(unsigned long arg) /* Send next queued raw (unknown type) packet */ while ((skb = skb_dequeue(&hdev->raw_q))) hci_send_frame(skb); - - read_unlock(&hci_task_lock); } /* ----- HCI RX task (incoming data processing) ----- */ @@ -2279,16 +2429,11 @@ static inline void hci_acldata_packet(struct hci_dev *hdev, struct sk_buff *skb) hci_dev_unlock(hdev); if (conn) { - register struct hci_proto *hp; - - hci_conn_enter_active_mode(conn, bt_cb(skb)->force_active); + hci_conn_enter_active_mode(conn, BT_POWER_FORCE_ACTIVE_OFF); /* Send to upper protocol */ - hp = hci_proto[HCI_PROTO_L2CAP]; - if (hp && hp->recv_acldata) { - hp->recv_acldata(conn, skb, flags); - return; - } + l2cap_recv_acldata(conn, skb, flags); + return; } else { BT_ERR("%s ACL packet for unknown connection handle %d", hdev->name, handle); @@ -2317,14 +2462,9 @@ static inline void hci_scodata_packet(struct hci_dev *hdev, struct sk_buff *skb) hci_dev_unlock(hdev); if (conn) { - register struct hci_proto *hp; - /* Send to upper protocol */ - hp = hci_proto[HCI_PROTO_SCO]; - if (hp && hp->recv_scodata) { - hp->recv_scodata(conn, skb); - return; - } + sco_recv_scodata(conn, skb); + return; } else { BT_ERR("%s SCO packet for unknown connection handle %d", hdev->name, handle); @@ -2333,15 +2473,13 @@ static inline void hci_scodata_packet(struct hci_dev *hdev, struct sk_buff *skb) kfree_skb(skb); } -static void hci_rx_task(unsigned long arg) +static void hci_rx_work(struct work_struct *work) { - struct hci_dev *hdev = (struct hci_dev *) arg; + struct hci_dev *hdev = container_of(work, struct hci_dev, rx_work); struct sk_buff *skb; BT_DBG("%s", hdev->name); - read_lock(&hci_task_lock); - while ((skb = skb_dequeue(&hdev->rx_q))) { if (atomic_read(&hdev->promisc)) { /* Send copy to the sockets */ @@ -2366,6 +2504,7 @@ static void hci_rx_task(unsigned long arg) /* Process frame */ switch (bt_cb(skb)->pkt_type) { case HCI_EVENT_PKT: + BT_DBG("%s Event packet", hdev->name); hci_event_packet(hdev, skb); break; @@ -2384,13 +2523,11 @@ static void hci_rx_task(unsigned long arg) break; } } - - read_unlock(&hci_task_lock); } -static void hci_cmd_task(unsigned long arg) +static void hci_cmd_work(struct work_struct *work) { - struct hci_dev *hdev = (struct hci_dev *) arg; + struct hci_dev *hdev = container_of(work, struct hci_dev, cmd_work); struct sk_buff *skb; BT_DBG("%s cmd %d", hdev->name, atomic_read(&hdev->cmd_cnt)); @@ -2414,7 +2551,38 @@ static void hci_cmd_task(unsigned long arg) jiffies + msecs_to_jiffies(HCI_CMD_TIMEOUT)); } else { skb_queue_head(&hdev->cmd_q, skb); - tasklet_schedule(&hdev->cmd_task); + queue_work(hdev->workqueue, &hdev->cmd_work); } } } + +int hci_do_inquiry(struct hci_dev *hdev, u8 length) +{ + /* General inquiry access code (GIAC) */ + u8 lap[3] = { 0x33, 0x8b, 0x9e }; + struct hci_cp_inquiry cp; + + BT_DBG("%s", hdev->name); + + if (test_bit(HCI_INQUIRY, &hdev->flags)) + return -EINPROGRESS; + + memset(&cp, 0, sizeof(cp)); + memcpy(&cp.lap, lap, sizeof(cp.lap)); + cp.length = length; + + return hci_send_cmd(hdev, HCI_OP_INQUIRY, sizeof(cp), &cp); +} + +int hci_cancel_inquiry(struct hci_dev *hdev) +{ + BT_DBG("%s", hdev->name); + + if (!test_bit(HCI_INQUIRY, &hdev->flags)) + return -EPERM; + + return hci_send_cmd(hdev, HCI_OP_INQUIRY_CANCEL, 0, NULL); +} + +module_param(enable_hs, bool, 0644); +MODULE_PARM_DESC(enable_hs, "Enable High Speed"); diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 7ef4eb4435fb..001307f81057 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -45,7 +45,7 @@ #include <net/bluetooth/bluetooth.h> #include <net/bluetooth/hci_core.h> -static int enable_le; +static bool enable_le; /* Handle HCI Event packets */ @@ -55,12 +55,18 @@ static void hci_cc_inquiry_cancel(struct hci_dev *hdev, struct sk_buff *skb) BT_DBG("%s status 0x%x", hdev->name, status); - if (status) + if (status) { + hci_dev_lock(hdev); + mgmt_stop_discovery_failed(hdev, status); + hci_dev_unlock(hdev); return; + } + + clear_bit(HCI_INQUIRY, &hdev->flags); - if (test_and_clear_bit(HCI_INQUIRY, &hdev->flags) && - test_bit(HCI_MGMT, &hdev->flags)) - mgmt_discovering(hdev->id, 0); + hci_dev_lock(hdev); + mgmt_discovering(hdev, 0); + hci_dev_unlock(hdev); hci_req_complete(hdev, HCI_OP_INQUIRY_CANCEL, status); @@ -76,10 +82,6 @@ static void hci_cc_exit_periodic_inq(struct hci_dev *hdev, struct sk_buff *skb) if (status) return; - if (test_and_clear_bit(HCI_INQUIRY, &hdev->flags) && - test_bit(HCI_MGMT, &hdev->flags)) - mgmt_discovering(hdev->id, 0); - hci_conn_check_pending(hdev); } @@ -192,6 +194,8 @@ static void hci_cc_reset(struct hci_dev *hdev, struct sk_buff *skb) clear_bit(HCI_RESET, &hdev->flags); hci_req_complete(hdev, HCI_OP_RESET, status); + + hdev->dev_flags = 0; } static void hci_cc_write_local_name(struct hci_dev *hdev, struct sk_buff *skb) @@ -205,13 +209,15 @@ static void hci_cc_write_local_name(struct hci_dev *hdev, struct sk_buff *skb) if (!sent) return; + hci_dev_lock(hdev); + if (test_bit(HCI_MGMT, &hdev->flags)) - mgmt_set_local_name_complete(hdev->id, sent, status); + mgmt_set_local_name_complete(hdev, sent, status); - if (status) - return; + if (status == 0) + memcpy(hdev->dev_name, sent, HCI_MAX_NAME_LENGTH); - memcpy(hdev->dev_name, sent, HCI_MAX_NAME_LENGTH); + hci_dev_unlock(hdev); } static void hci_cc_read_local_name(struct hci_dev *hdev, struct sk_buff *skb) @@ -274,7 +280,8 @@ static void hci_cc_write_encrypt_mode(struct hci_dev *hdev, struct sk_buff *skb) static void hci_cc_write_scan_enable(struct hci_dev *hdev, struct sk_buff *skb) { - __u8 status = *((__u8 *) skb->data); + __u8 param, status = *((__u8 *) skb->data); + int old_pscan, old_iscan; void *sent; BT_DBG("%s status 0x%x", hdev->name, status); @@ -283,28 +290,40 @@ static void hci_cc_write_scan_enable(struct hci_dev *hdev, struct sk_buff *skb) if (!sent) return; - if (!status) { - __u8 param = *((__u8 *) sent); - int old_pscan, old_iscan; + param = *((__u8 *) sent); - old_pscan = test_and_clear_bit(HCI_PSCAN, &hdev->flags); - old_iscan = test_and_clear_bit(HCI_ISCAN, &hdev->flags); - - if (param & SCAN_INQUIRY) { - set_bit(HCI_ISCAN, &hdev->flags); - if (!old_iscan) - mgmt_discoverable(hdev->id, 1); - } else if (old_iscan) - mgmt_discoverable(hdev->id, 0); + hci_dev_lock(hdev); - if (param & SCAN_PAGE) { - set_bit(HCI_PSCAN, &hdev->flags); - if (!old_pscan) - mgmt_connectable(hdev->id, 1); - } else if (old_pscan) - mgmt_connectable(hdev->id, 0); + if (status != 0) { + mgmt_write_scan_failed(hdev, param, status); + hdev->discov_timeout = 0; + goto done; } + old_pscan = test_and_clear_bit(HCI_PSCAN, &hdev->flags); + old_iscan = test_and_clear_bit(HCI_ISCAN, &hdev->flags); + + if (param & SCAN_INQUIRY) { + set_bit(HCI_ISCAN, &hdev->flags); + if (!old_iscan) + mgmt_discoverable(hdev, 1); + if (hdev->discov_timeout > 0) { + int to = msecs_to_jiffies(hdev->discov_timeout * 1000); + queue_delayed_work(hdev->workqueue, &hdev->discov_off, + to); + } + } else if (old_iscan) + mgmt_discoverable(hdev, 0); + + if (param & SCAN_PAGE) { + set_bit(HCI_PSCAN, &hdev->flags); + if (!old_pscan) + mgmt_connectable(hdev, 1); + } else if (old_pscan) + mgmt_connectable(hdev, 0); + +done: + hci_dev_unlock(hdev); hci_req_complete(hdev, HCI_OP_WRITE_SCAN_ENABLE, status); } @@ -359,11 +378,8 @@ static void hci_cc_read_voice_setting(struct hci_dev *hdev, struct sk_buff *skb) BT_DBG("%s voice setting 0x%04x", hdev->name, setting); - if (hdev->notify) { - tasklet_disable(&hdev->tx_task); + if (hdev->notify) hdev->notify(hdev, HCI_NOTIFY_VOICE_SETTING); - tasklet_enable(&hdev->tx_task); - } } static void hci_cc_write_voice_setting(struct hci_dev *hdev, struct sk_buff *skb) @@ -390,11 +406,8 @@ static void hci_cc_write_voice_setting(struct hci_dev *hdev, struct sk_buff *skb BT_DBG("%s voice setting 0x%04x", hdev->name, setting); - if (hdev->notify) { - tasklet_disable(&hdev->tx_task); + if (hdev->notify) hdev->notify(hdev, HCI_NOTIFY_VOICE_SETTING); - tasklet_enable(&hdev->tx_task); - } } static void hci_cc_host_buffer_size(struct hci_dev *hdev, struct sk_buff *skb) @@ -481,7 +494,7 @@ static void hci_setup_event_mask(struct hci_dev *hdev) /* CSR 1.1 dongles does not accept any bitfield so don't try to set * any event mask for pre 1.2 devices */ - if (hdev->lmp_ver <= 1) + if (hdev->hci_ver < BLUETOOTH_VER_1_2) return; events[4] |= 0x01; /* Flow Specification Complete */ @@ -543,9 +556,12 @@ static void hci_set_le_support(struct hci_dev *hdev) static void hci_setup(struct hci_dev *hdev) { + if (hdev->dev_type != HCI_BREDR) + return; + hci_setup_event_mask(hdev); - if (hdev->lmp_ver > 1) + if (hdev->hci_ver > BLUETOOTH_VER_1_1) hci_send_cmd(hdev, HCI_OP_READ_LOCAL_COMMANDS, 0, NULL); if (hdev->features[6] & LMP_SIMPLE_PAIR) { @@ -695,11 +711,33 @@ static void hci_cc_read_local_ext_features(struct hci_dev *hdev, if (rp->status) return; - memcpy(hdev->extfeatures, rp->features, 8); + switch (rp->page) { + case 0: + memcpy(hdev->features, rp->features, 8); + break; + case 1: + memcpy(hdev->host_features, rp->features, 8); + break; + } hci_req_complete(hdev, HCI_OP_READ_LOCAL_EXT_FEATURES, rp->status); } +static void hci_cc_read_flow_control_mode(struct hci_dev *hdev, + struct sk_buff *skb) +{ + struct hci_rp_read_flow_control_mode *rp = (void *) skb->data; + + BT_DBG("%s status 0x%x", hdev->name, rp->status); + + if (rp->status) + return; + + hdev->flow_ctl_mode = rp->mode; + + hci_req_complete(hdev, HCI_OP_READ_FLOW_CONTROL_MODE, rp->status); +} + static void hci_cc_read_buffer_size(struct hci_dev *hdev, struct sk_buff *skb) { struct hci_rp_read_buffer_size *rp = (void *) skb->data; @@ -739,6 +777,28 @@ static void hci_cc_read_bd_addr(struct hci_dev *hdev, struct sk_buff *skb) hci_req_complete(hdev, HCI_OP_READ_BD_ADDR, rp->status); } +static void hci_cc_read_data_block_size(struct hci_dev *hdev, + struct sk_buff *skb) +{ + struct hci_rp_read_data_block_size *rp = (void *) skb->data; + + BT_DBG("%s status 0x%x", hdev->name, rp->status); + + if (rp->status) + return; + + hdev->block_mtu = __le16_to_cpu(rp->max_acl_len); + hdev->block_len = __le16_to_cpu(rp->block_len); + hdev->num_blocks = __le16_to_cpu(rp->num_blocks); + + hdev->block_cnt = hdev->num_blocks; + + BT_DBG("%s blk mtu %d cnt %d len %d", hdev->name, hdev->block_mtu, + hdev->block_cnt, hdev->block_len); + + hci_req_complete(hdev, HCI_OP_READ_DATA_BLOCK_SIZE, rp->status); +} + static void hci_cc_write_ca_timeout(struct hci_dev *hdev, struct sk_buff *skb) { __u8 status = *((__u8 *) skb->data); @@ -748,6 +808,30 @@ static void hci_cc_write_ca_timeout(struct hci_dev *hdev, struct sk_buff *skb) hci_req_complete(hdev, HCI_OP_WRITE_CA_TIMEOUT, status); } +static void hci_cc_read_local_amp_info(struct hci_dev *hdev, + struct sk_buff *skb) +{ + struct hci_rp_read_local_amp_info *rp = (void *) skb->data; + + BT_DBG("%s status 0x%x", hdev->name, rp->status); + + if (rp->status) + return; + + hdev->amp_status = rp->amp_status; + hdev->amp_total_bw = __le32_to_cpu(rp->total_bw); + hdev->amp_max_bw = __le32_to_cpu(rp->max_bw); + hdev->amp_min_latency = __le32_to_cpu(rp->min_latency); + hdev->amp_max_pdu = __le32_to_cpu(rp->max_pdu); + hdev->amp_type = rp->amp_type; + hdev->amp_pal_cap = __le16_to_cpu(rp->pal_cap); + hdev->amp_assoc_size = __le16_to_cpu(rp->max_assoc_size); + hdev->amp_be_flush_to = __le32_to_cpu(rp->be_flush_to); + hdev->amp_max_flush_to = __le32_to_cpu(rp->max_flush_to); + + hci_req_complete(hdev, HCI_OP_READ_LOCAL_AMP_INFO, rp->status); +} + static void hci_cc_delete_stored_link_key(struct hci_dev *hdev, struct sk_buff *skb) { @@ -804,19 +888,24 @@ static void hci_cc_pin_code_reply(struct hci_dev *hdev, struct sk_buff *skb) BT_DBG("%s status 0x%x", hdev->name, rp->status); + hci_dev_lock(hdev); + if (test_bit(HCI_MGMT, &hdev->flags)) - mgmt_pin_code_reply_complete(hdev->id, &rp->bdaddr, rp->status); + mgmt_pin_code_reply_complete(hdev, &rp->bdaddr, rp->status); if (rp->status != 0) - return; + goto unlock; cp = hci_sent_cmd_data(hdev, HCI_OP_PIN_CODE_REPLY); if (!cp) - return; + goto unlock; conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &cp->bdaddr); if (conn) conn->pin_length = cp->pin_len; + +unlock: + hci_dev_unlock(hdev); } static void hci_cc_pin_code_neg_reply(struct hci_dev *hdev, struct sk_buff *skb) @@ -825,10 +914,15 @@ static void hci_cc_pin_code_neg_reply(struct hci_dev *hdev, struct sk_buff *skb) BT_DBG("%s status 0x%x", hdev->name, rp->status); + hci_dev_lock(hdev); + if (test_bit(HCI_MGMT, &hdev->flags)) - mgmt_pin_code_neg_reply_complete(hdev->id, &rp->bdaddr, + mgmt_pin_code_neg_reply_complete(hdev, &rp->bdaddr, rp->status); + + hci_dev_unlock(hdev); } + static void hci_cc_le_read_buffer_size(struct hci_dev *hdev, struct sk_buff *skb) { @@ -855,9 +949,13 @@ static void hci_cc_user_confirm_reply(struct hci_dev *hdev, struct sk_buff *skb) BT_DBG("%s status 0x%x", hdev->name, rp->status); + hci_dev_lock(hdev); + if (test_bit(HCI_MGMT, &hdev->flags)) - mgmt_user_confirm_reply_complete(hdev->id, &rp->bdaddr, + mgmt_user_confirm_reply_complete(hdev, &rp->bdaddr, rp->status); + + hci_dev_unlock(hdev); } static void hci_cc_user_confirm_neg_reply(struct hci_dev *hdev, @@ -867,9 +965,44 @@ static void hci_cc_user_confirm_neg_reply(struct hci_dev *hdev, BT_DBG("%s status 0x%x", hdev->name, rp->status); + hci_dev_lock(hdev); + + if (test_bit(HCI_MGMT, &hdev->flags)) + mgmt_user_confirm_neg_reply_complete(hdev, &rp->bdaddr, + rp->status); + + hci_dev_unlock(hdev); +} + +static void hci_cc_user_passkey_reply(struct hci_dev *hdev, struct sk_buff *skb) +{ + struct hci_rp_user_confirm_reply *rp = (void *) skb->data; + + BT_DBG("%s status 0x%x", hdev->name, rp->status); + + hci_dev_lock(hdev); + + if (test_bit(HCI_MGMT, &hdev->flags)) + mgmt_user_passkey_reply_complete(hdev, &rp->bdaddr, + rp->status); + + hci_dev_unlock(hdev); +} + +static void hci_cc_user_passkey_neg_reply(struct hci_dev *hdev, + struct sk_buff *skb) +{ + struct hci_rp_user_confirm_reply *rp = (void *) skb->data; + + BT_DBG("%s status 0x%x", hdev->name, rp->status); + + hci_dev_lock(hdev); + if (test_bit(HCI_MGMT, &hdev->flags)) - mgmt_user_confirm_neg_reply_complete(hdev->id, &rp->bdaddr, + mgmt_user_passkey_neg_reply_complete(hdev, &rp->bdaddr, rp->status); + + hci_dev_unlock(hdev); } static void hci_cc_read_local_oob_data_reply(struct hci_dev *hdev, @@ -879,8 +1012,17 @@ static void hci_cc_read_local_oob_data_reply(struct hci_dev *hdev, BT_DBG("%s status 0x%x", hdev->name, rp->status); - mgmt_read_local_oob_data_reply_complete(hdev->id, rp->hash, + hci_dev_lock(hdev); + mgmt_read_local_oob_data_reply_complete(hdev, rp->hash, rp->randomizer, rp->status); + hci_dev_unlock(hdev); +} + +static void hci_cc_le_set_scan_param(struct hci_dev *hdev, struct sk_buff *skb) +{ + __u8 status = *((__u8 *) skb->data); + + BT_DBG("%s status 0x%x", hdev->name, status); } static void hci_cc_le_set_scan_enable(struct hci_dev *hdev, @@ -898,16 +1040,27 @@ static void hci_cc_le_set_scan_enable(struct hci_dev *hdev, if (!cp) return; - hci_dev_lock(hdev); + switch (cp->enable) { + case LE_SCANNING_ENABLED: + set_bit(HCI_LE_SCAN, &hdev->dev_flags); - if (cp->enable == 0x01) { - del_timer(&hdev->adv_timer); + cancel_delayed_work_sync(&hdev->adv_work); + + hci_dev_lock(hdev); hci_adv_entries_clear(hdev); - } else if (cp->enable == 0x00) { - mod_timer(&hdev->adv_timer, jiffies + ADV_CLEAR_TIMEOUT); - } + hci_dev_unlock(hdev); + break; - hci_dev_unlock(hdev); + case LE_SCANNING_DISABLED: + clear_bit(HCI_LE_SCAN, &hdev->dev_flags); + + schedule_delayed_work(&hdev->adv_work, ADV_CLEAR_TIMEOUT); + break; + + default: + BT_ERR("Used reserved LE_Scan_Enable param %d", cp->enable); + break; + } } static void hci_cc_le_ltk_reply(struct hci_dev *hdev, struct sk_buff *skb) @@ -956,12 +1109,18 @@ static inline void hci_cs_inquiry(struct hci_dev *hdev, __u8 status) if (status) { hci_req_complete(hdev, HCI_OP_INQUIRY, status); hci_conn_check_pending(hdev); + hci_dev_lock(hdev); + if (test_bit(HCI_MGMT, &hdev->flags)) + mgmt_start_discovery_failed(hdev, status); + hci_dev_unlock(hdev); return; } - if (!test_and_set_bit(HCI_INQUIRY, &hdev->flags) && - test_bit(HCI_MGMT, &hdev->flags)) - mgmt_discovering(hdev->id, 1); + set_bit(HCI_INQUIRY, &hdev->flags); + + hci_dev_lock(hdev); + mgmt_discovering(hdev, 1); + hci_dev_unlock(hdev); } static inline void hci_cs_create_conn(struct hci_dev *hdev, __u8 status) @@ -1103,9 +1262,10 @@ static int hci_outgoing_auth_needed(struct hci_dev *hdev, return 0; /* Only request authentication for SSP connections or non-SSP - * devices with sec_level HIGH */ + * devices with sec_level HIGH or if MITM protection is requested */ if (!(hdev->ssp_mode > 0 && conn->ssp_mode > 0) && - conn->pending_sec_level != BT_SECURITY_HIGH) + conn->pending_sec_level != BT_SECURITY_HIGH && + !(conn->auth_type & 0x01)) return 0; return 1; @@ -1339,13 +1499,16 @@ static inline void hci_inquiry_complete_evt(struct hci_dev *hdev, struct sk_buff BT_DBG("%s status %d", hdev->name, status); - if (test_and_clear_bit(HCI_INQUIRY, &hdev->flags) && - test_bit(HCI_MGMT, &hdev->flags)) - mgmt_discovering(hdev->id, 0); - hci_req_complete(hdev, HCI_OP_INQUIRY, status); hci_conn_check_pending(hdev); + + if (!test_and_clear_bit(HCI_INQUIRY, &hdev->flags)) + return; + + hci_dev_lock(hdev); + mgmt_discovering(hdev, 0); + hci_dev_unlock(hdev); } static inline void hci_inquiry_result_evt(struct hci_dev *hdev, struct sk_buff *skb) @@ -1361,12 +1524,6 @@ static inline void hci_inquiry_result_evt(struct hci_dev *hdev, struct sk_buff * hci_dev_lock(hdev); - if (!test_and_set_bit(HCI_INQUIRY, &hdev->flags)) { - - if (test_bit(HCI_MGMT, &hdev->flags)) - mgmt_discovering(hdev->id, 1); - } - for (; num_rsp; num_rsp--, info++) { bacpy(&data.bdaddr, &info->bdaddr); data.pscan_rep_mode = info->pscan_rep_mode; @@ -1377,8 +1534,8 @@ static inline void hci_inquiry_result_evt(struct hci_dev *hdev, struct sk_buff * data.rssi = 0x00; data.ssp_mode = 0x00; hci_inquiry_cache_update(hdev, &data); - mgmt_device_found(hdev->id, &info->bdaddr, info->dev_class, 0, - NULL); + mgmt_device_found(hdev, &info->bdaddr, ACL_LINK, 0x00, + info->dev_class, 0, NULL); } hci_dev_unlock(hdev); @@ -1412,7 +1569,8 @@ static inline void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *s conn->state = BT_CONFIG; hci_conn_hold(conn); conn->disc_timeout = HCI_DISCONN_TIMEOUT; - mgmt_connected(hdev->id, &ev->bdaddr); + mgmt_connected(hdev, &ev->bdaddr, conn->type, + conn->dst_type); } else conn->state = BT_CONNECTED; @@ -1434,7 +1592,7 @@ static inline void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *s } /* Set packet type for incoming connection */ - if (!conn->out && hdev->hci_ver < 3) { + if (!conn->out && hdev->hci_ver < BLUETOOTH_VER_2_0) { struct hci_cp_change_conn_ptype cp; cp.handle = ev->handle; cp.pkt_type = cpu_to_le16(conn->pkt_type); @@ -1444,7 +1602,8 @@ static inline void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *s } else { conn->state = BT_CLOSED; if (conn->type == ACL_LINK) - mgmt_connect_failed(hdev->id, &ev->bdaddr, ev->status); + mgmt_connect_failed(hdev, &ev->bdaddr, conn->type, + conn->dst_type, ev->status); } if (conn->type == ACL_LINK) @@ -1531,7 +1690,7 @@ static inline void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *sk struct hci_cp_reject_conn_req cp; bacpy(&cp.bdaddr, &ev->bdaddr); - cp.reason = 0x0f; + cp.reason = HCI_ERROR_REJ_BAD_ADDR; hci_send_cmd(hdev, HCI_OP_REJECT_CONN_REQ, sizeof(cp), &cp); } } @@ -1543,24 +1702,27 @@ static inline void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff BT_DBG("%s status %d", hdev->name, ev->status); - if (ev->status) { - mgmt_disconnect_failed(hdev->id); - return; - } - hci_dev_lock(hdev); conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->handle)); if (!conn) goto unlock; - conn->state = BT_CLOSED; + if (ev->status == 0) + conn->state = BT_CLOSED; - if (conn->type == ACL_LINK || conn->type == LE_LINK) - mgmt_disconnected(hdev->id, &conn->dst); + if (conn->type == ACL_LINK || conn->type == LE_LINK) { + if (ev->status != 0) + mgmt_disconnect_failed(hdev, &conn->dst, ev->status); + else + mgmt_disconnected(hdev, &conn->dst, conn->type, + conn->dst_type); + } - hci_proto_disconn_cfm(conn, ev->reason); - hci_conn_del(conn); + if (ev->status == 0) { + hci_proto_disconn_cfm(conn, ev->reason); + hci_conn_del(conn); + } unlock: hci_dev_unlock(hdev); @@ -1588,7 +1750,7 @@ static inline void hci_auth_complete_evt(struct hci_dev *hdev, struct sk_buff *s conn->sec_level = conn->pending_sec_level; } } else { - mgmt_auth_failed(hdev->id, &conn->dst, ev->status); + mgmt_auth_failed(hdev, &conn->dst, ev->status); } clear_bit(HCI_CONN_AUTH_PEND, &conn->pend); @@ -1643,7 +1805,7 @@ static inline void hci_remote_name_evt(struct hci_dev *hdev, struct sk_buff *skb hci_dev_lock(hdev); if (ev->status == 0 && test_bit(HCI_MGMT, &hdev->flags)) - mgmt_remote_name(hdev->id, &ev->bdaddr, ev->name); + mgmt_remote_name(hdev, &ev->bdaddr, ev->name); conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr); if (!conn) @@ -1894,10 +2056,22 @@ static inline void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *sk hci_cc_read_bd_addr(hdev, skb); break; + case HCI_OP_READ_DATA_BLOCK_SIZE: + hci_cc_read_data_block_size(hdev, skb); + break; + case HCI_OP_WRITE_CA_TIMEOUT: hci_cc_write_ca_timeout(hdev, skb); break; + case HCI_OP_READ_FLOW_CONTROL_MODE: + hci_cc_read_flow_control_mode(hdev, skb); + break; + + case HCI_OP_READ_LOCAL_AMP_INFO: + hci_cc_read_local_amp_info(hdev, skb); + break; + case HCI_OP_DELETE_STORED_LINK_KEY: hci_cc_delete_stored_link_key(hdev, skb); break; @@ -1942,6 +2116,17 @@ static inline void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *sk hci_cc_user_confirm_neg_reply(hdev, skb); break; + case HCI_OP_USER_PASSKEY_REPLY: + hci_cc_user_passkey_reply(hdev, skb); + break; + + case HCI_OP_USER_PASSKEY_NEG_REPLY: + hci_cc_user_passkey_neg_reply(hdev, skb); + + case HCI_OP_LE_SET_SCAN_PARAM: + hci_cc_le_set_scan_param(hdev, skb); + break; + case HCI_OP_LE_SET_SCAN_ENABLE: hci_cc_le_set_scan_enable(hdev, skb); break; @@ -1969,7 +2154,7 @@ static inline void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *sk if (ev->ncmd) { atomic_set(&hdev->cmd_cnt, 1); if (!skb_queue_empty(&hdev->cmd_q)) - tasklet_schedule(&hdev->cmd_task); + queue_work(hdev->workqueue, &hdev->cmd_work); } } @@ -2029,7 +2214,7 @@ static inline void hci_cmd_status_evt(struct hci_dev *hdev, struct sk_buff *skb) case HCI_OP_DISCONNECT: if (ev->status != 0) - mgmt_disconnect_failed(hdev->id); + mgmt_disconnect_failed(hdev, NULL, ev->status); break; case HCI_OP_LE_CREATE_CONN: @@ -2051,7 +2236,7 @@ static inline void hci_cmd_status_evt(struct hci_dev *hdev, struct sk_buff *skb) if (ev->ncmd && !test_bit(HCI_RESET, &hdev->flags)) { atomic_set(&hdev->cmd_cnt, 1); if (!skb_queue_empty(&hdev->cmd_q)) - tasklet_schedule(&hdev->cmd_task); + queue_work(hdev->workqueue, &hdev->cmd_work); } } @@ -2084,56 +2269,67 @@ static inline void hci_role_change_evt(struct hci_dev *hdev, struct sk_buff *skb static inline void hci_num_comp_pkts_evt(struct hci_dev *hdev, struct sk_buff *skb) { struct hci_ev_num_comp_pkts *ev = (void *) skb->data; - __le16 *ptr; int i; - skb_pull(skb, sizeof(*ev)); - - BT_DBG("%s num_hndl %d", hdev->name, ev->num_hndl); + if (hdev->flow_ctl_mode != HCI_FLOW_CTL_MODE_PACKET_BASED) { + BT_ERR("Wrong event for mode %d", hdev->flow_ctl_mode); + return; + } - if (skb->len < ev->num_hndl * 4) { + if (skb->len < sizeof(*ev) || skb->len < sizeof(*ev) + + ev->num_hndl * sizeof(struct hci_comp_pkts_info)) { BT_DBG("%s bad parameters", hdev->name); return; } - tasklet_disable(&hdev->tx_task); + BT_DBG("%s num_hndl %d", hdev->name, ev->num_hndl); - for (i = 0, ptr = (__le16 *) skb->data; i < ev->num_hndl; i++) { + for (i = 0; i < ev->num_hndl; i++) { + struct hci_comp_pkts_info *info = &ev->handles[i]; struct hci_conn *conn; __u16 handle, count; - handle = get_unaligned_le16(ptr++); - count = get_unaligned_le16(ptr++); + handle = __le16_to_cpu(info->handle); + count = __le16_to_cpu(info->count); conn = hci_conn_hash_lookup_handle(hdev, handle); - if (conn) { - conn->sent -= count; - - if (conn->type == ACL_LINK) { + if (!conn) + continue; + + conn->sent -= count; + + switch (conn->type) { + case ACL_LINK: + hdev->acl_cnt += count; + if (hdev->acl_cnt > hdev->acl_pkts) + hdev->acl_cnt = hdev->acl_pkts; + break; + + case LE_LINK: + if (hdev->le_pkts) { + hdev->le_cnt += count; + if (hdev->le_cnt > hdev->le_pkts) + hdev->le_cnt = hdev->le_pkts; + } else { hdev->acl_cnt += count; if (hdev->acl_cnt > hdev->acl_pkts) hdev->acl_cnt = hdev->acl_pkts; - } else if (conn->type == LE_LINK) { - if (hdev->le_pkts) { - hdev->le_cnt += count; - if (hdev->le_cnt > hdev->le_pkts) - hdev->le_cnt = hdev->le_pkts; - } else { - hdev->acl_cnt += count; - if (hdev->acl_cnt > hdev->acl_pkts) - hdev->acl_cnt = hdev->acl_pkts; - } - } else { - hdev->sco_cnt += count; - if (hdev->sco_cnt > hdev->sco_pkts) - hdev->sco_cnt = hdev->sco_pkts; } + break; + + case SCO_LINK: + hdev->sco_cnt += count; + if (hdev->sco_cnt > hdev->sco_pkts) + hdev->sco_cnt = hdev->sco_pkts; + break; + + default: + BT_ERR("Unknown type %d conn %p", conn->type, conn); + break; } } - tasklet_schedule(&hdev->tx_task); - - tasklet_enable(&hdev->tx_task); + queue_work(hdev->workqueue, &hdev->tx_work); } static inline void hci_mode_change_evt(struct hci_dev *hdev, struct sk_buff *skb) @@ -2174,7 +2370,10 @@ static inline void hci_pin_code_request_evt(struct hci_dev *hdev, struct sk_buff hci_dev_lock(hdev); conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr); - if (conn && conn->state == BT_CONNECTED) { + if (!conn) + goto unlock; + + if (conn->state == BT_CONNECTED) { hci_conn_hold(conn); conn->disc_timeout = HCI_PAIRING_TIMEOUT; hci_conn_put(conn); @@ -2191,9 +2390,10 @@ static inline void hci_pin_code_request_evt(struct hci_dev *hdev, struct sk_buff else secure = 0; - mgmt_pin_code_request(hdev->id, &ev->bdaddr, secure); + mgmt_pin_code_request(hdev, &ev->bdaddr, secure); } +unlock: hci_dev_unlock(hdev); } @@ -2359,12 +2559,6 @@ static inline void hci_inquiry_result_with_rssi_evt(struct hci_dev *hdev, struct hci_dev_lock(hdev); - if (!test_and_set_bit(HCI_INQUIRY, &hdev->flags)) { - - if (test_bit(HCI_MGMT, &hdev->flags)) - mgmt_discovering(hdev->id, 1); - } - if ((skb->len - 1) / num_rsp != sizeof(struct inquiry_info_with_rssi)) { struct inquiry_info_with_rssi_and_pscan_mode *info; info = (void *) (skb->data + 1); @@ -2379,7 +2573,7 @@ static inline void hci_inquiry_result_with_rssi_evt(struct hci_dev *hdev, struct data.rssi = info->rssi; data.ssp_mode = 0x00; hci_inquiry_cache_update(hdev, &data); - mgmt_device_found(hdev->id, &info->bdaddr, + mgmt_device_found(hdev, &info->bdaddr, ACL_LINK, 0x00, info->dev_class, info->rssi, NULL); } @@ -2396,7 +2590,7 @@ static inline void hci_inquiry_result_with_rssi_evt(struct hci_dev *hdev, struct data.rssi = info->rssi; data.ssp_mode = 0x00; hci_inquiry_cache_update(hdev, &data); - mgmt_device_found(hdev->id, &info->bdaddr, + mgmt_device_found(hdev, &info->bdaddr, ACL_LINK, 0x00, info->dev_class, info->rssi, NULL); } @@ -2527,12 +2721,6 @@ static inline void hci_extended_inquiry_result_evt(struct hci_dev *hdev, struct if (!num_rsp) return; - if (!test_and_set_bit(HCI_INQUIRY, &hdev->flags)) { - - if (test_bit(HCI_MGMT, &hdev->flags)) - mgmt_discovering(hdev->id, 1); - } - hci_dev_lock(hdev); for (; num_rsp; num_rsp--, info++) { @@ -2545,8 +2733,8 @@ static inline void hci_extended_inquiry_result_evt(struct hci_dev *hdev, struct data.rssi = info->rssi; data.ssp_mode = 0x01; hci_inquiry_cache_update(hdev, &data); - mgmt_device_found(hdev->id, &info->bdaddr, info->dev_class, - info->rssi, info->data); + mgmt_device_found(hdev, &info->bdaddr, ACL_LINK, 0x00, + info->dev_class, info->rssi, info->data); } hci_dev_unlock(hdev); @@ -2610,7 +2798,7 @@ static inline void hci_io_capa_request_evt(struct hci_dev *hdev, struct sk_buff struct hci_cp_io_capability_neg_reply cp; bacpy(&cp.bdaddr, &ev->bdaddr); - cp.reason = 0x18; /* Pairing not allowed */ + cp.reason = HCI_ERROR_PAIRING_NOT_ALLOWED; hci_send_cmd(hdev, HCI_OP_IO_CAPABILITY_NEG_REPLY, sizeof(cp), &cp); @@ -2702,13 +2890,28 @@ static inline void hci_user_confirm_request_evt(struct hci_dev *hdev, } confirm: - mgmt_user_confirm_request(hdev->id, &ev->bdaddr, ev->passkey, + mgmt_user_confirm_request(hdev, &ev->bdaddr, ev->passkey, confirm_hint); unlock: hci_dev_unlock(hdev); } +static inline void hci_user_passkey_request_evt(struct hci_dev *hdev, + struct sk_buff *skb) +{ + struct hci_ev_user_passkey_req *ev = (void *) skb->data; + + BT_DBG("%s", hdev->name); + + hci_dev_lock(hdev); + + if (test_bit(HCI_MGMT, &hdev->flags)) + mgmt_user_passkey_request(hdev, &ev->bdaddr); + + hci_dev_unlock(hdev); +} + static inline void hci_simple_pair_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) { struct hci_ev_simple_pair_complete *ev = (void *) skb->data; @@ -2728,7 +2931,7 @@ static inline void hci_simple_pair_complete_evt(struct hci_dev *hdev, struct sk_ * event gets always produced as initiator and is also mapped to * the mgmt_auth_failed event */ if (!test_bit(HCI_CONN_AUTH_PEND, &conn->pend) && ev->status != 0) - mgmt_auth_failed(hdev->id, &conn->dst, ev->status); + mgmt_auth_failed(hdev, &conn->dst, ev->status); hci_conn_put(conn); @@ -2809,14 +3012,15 @@ static inline void hci_le_conn_complete_evt(struct hci_dev *hdev, struct sk_buff } if (ev->status) { - mgmt_connect_failed(hdev->id, &ev->bdaddr, ev->status); + mgmt_connect_failed(hdev, &ev->bdaddr, conn->type, + conn->dst_type, ev->status); hci_proto_connect_cfm(conn, ev->status); conn->state = BT_CLOSED; hci_conn_del(conn); goto unlock; } - mgmt_connected(hdev->id, &ev->bdaddr); + mgmt_connected(hdev, &ev->bdaddr, conn->type, conn->dst_type); conn->sec_level = BT_SECURITY_LOW; conn->handle = __le16_to_cpu(ev->handle); @@ -2834,19 +3038,17 @@ unlock: static inline void hci_le_adv_report_evt(struct hci_dev *hdev, struct sk_buff *skb) { - struct hci_ev_le_advertising_info *ev; - u8 num_reports; - - num_reports = skb->data[0]; - ev = (void *) &skb->data[1]; + u8 num_reports = skb->data[0]; + void *ptr = &skb->data[1]; hci_dev_lock(hdev); - hci_add_adv_entry(hdev, ev); + while (num_reports--) { + struct hci_ev_le_advertising_info *ev = ptr; - while (--num_reports) { - ev = (void *) (ev->data + ev->length + 1); hci_add_adv_entry(hdev, ev); + + ptr += sizeof(*ev) + ev->length + 1; } hci_dev_unlock(hdev); @@ -3049,6 +3251,10 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb) hci_user_confirm_request_evt(hdev, skb); break; + case HCI_EV_USER_PASSKEY_REQUEST: + hci_user_passkey_request_evt(hdev, skb); + break; + case HCI_EV_SIMPLE_PAIR_COMPLETE: hci_simple_pair_complete_evt(hdev, skb); break; @@ -3102,5 +3308,5 @@ void hci_si_event(struct hci_dev *hdev, int type, int dlen, void *data) kfree_skb(skb); } -module_param(enable_le, bool, 0444); +module_param(enable_le, bool, 0644); MODULE_PARM_DESC(enable_le, "Enable LE support"); diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index ff02cf5e77cc..0dcc96266779 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -49,7 +49,7 @@ #include <net/bluetooth/bluetooth.h> #include <net/bluetooth/hci_core.h> -static int enable_mgmt; +static bool enable_mgmt; /* ----- HCI socket interface ----- */ @@ -183,21 +183,35 @@ static int hci_sock_release(struct socket *sock) static int hci_sock_blacklist_add(struct hci_dev *hdev, void __user *arg) { bdaddr_t bdaddr; + int err; if (copy_from_user(&bdaddr, arg, sizeof(bdaddr))) return -EFAULT; - return hci_blacklist_add(hdev, &bdaddr); + hci_dev_lock(hdev); + + err = hci_blacklist_add(hdev, &bdaddr); + + hci_dev_unlock(hdev); + + return err; } static int hci_sock_blacklist_del(struct hci_dev *hdev, void __user *arg) { bdaddr_t bdaddr; + int err; if (copy_from_user(&bdaddr, arg, sizeof(bdaddr))) return -EFAULT; - return hci_blacklist_del(hdev, &bdaddr); + hci_dev_lock(hdev); + + err = hci_blacklist_del(hdev, &bdaddr); + + hci_dev_unlock(hdev); + + return err; } /* Ioctls that require bound socket */ @@ -329,8 +343,11 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, int addr_le if (haddr.hci_channel > HCI_CHANNEL_CONTROL) return -EINVAL; - if (haddr.hci_channel == HCI_CHANNEL_CONTROL && !enable_mgmt) - return -EINVAL; + if (haddr.hci_channel == HCI_CHANNEL_CONTROL) { + if (!enable_mgmt) + return -EINVAL; + set_bit(HCI_PI_MGMT_INIT, &hci_pi(sk)->flags); + } lock_sock(sk); @@ -521,10 +538,10 @@ static int hci_sock_sendmsg(struct kiocb *iocb, struct socket *sock, if (test_bit(HCI_RAW, &hdev->flags) || (ogf == 0x3f)) { skb_queue_tail(&hdev->raw_q, skb); - tasklet_schedule(&hdev->tx_task); + queue_work(hdev->workqueue, &hdev->tx_work); } else { skb_queue_tail(&hdev->cmd_q, skb); - tasklet_schedule(&hdev->cmd_task); + queue_work(hdev->workqueue, &hdev->cmd_work); } } else { if (!capable(CAP_NET_RAW)) { @@ -533,7 +550,7 @@ static int hci_sock_sendmsg(struct kiocb *iocb, struct socket *sock, } skb_queue_tail(&hdev->raw_q, skb); - tasklet_schedule(&hdev->tx_task); + queue_work(hdev->workqueue, &hdev->tx_work); } err = len; @@ -750,7 +767,6 @@ static int hci_sock_dev_event(struct notifier_block *this, unsigned long event, /* Detach sockets from device */ read_lock(&hci_sk_list.lock); sk_for_each(sk, node, &hci_sk_list.head) { - local_bh_disable(); bh_lock_sock_nested(sk); if (hci_pi(sk)->hdev == hdev) { hci_pi(sk)->hdev = NULL; @@ -761,7 +777,6 @@ static int hci_sock_dev_event(struct notifier_block *this, unsigned long event, hci_dev_put(hdev); } bh_unlock_sock(sk); - local_bh_enable(); } read_unlock(&hci_sk_list.lock); } diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c index a6c3aa8be1f7..521095614235 100644 --- a/net/bluetooth/hci_sysfs.c +++ b/net/bluetooth/hci_sysfs.c @@ -5,6 +5,7 @@ #include <linux/init.h> #include <linux/debugfs.h> #include <linux/seq_file.h> +#include <linux/module.h> #include <net/bluetooth/bluetooth.h> #include <net/bluetooth/hci_core.h> @@ -23,6 +24,8 @@ static inline char *link_typetostr(int type) return "SCO"; case ESCO_LINK: return "eSCO"; + case LE_LINK: + return "LE"; default: return "UNKNOWN"; } @@ -86,11 +89,35 @@ static struct device_type bt_link = { .release = bt_link_release, }; -static void add_conn(struct work_struct *work) +/* + * The rfcomm tty device will possibly retain even when conn + * is down, and sysfs doesn't support move zombie device, + * so we should move the device before conn device is destroyed. + */ +static int __match_tty(struct device *dev, void *data) +{ + return !strncmp(dev_name(dev), "rfcomm", 6); +} + +void hci_conn_init_sysfs(struct hci_conn *conn) +{ + struct hci_dev *hdev = conn->hdev; + + BT_DBG("conn %p", conn); + + conn->dev.type = &bt_link; + conn->dev.class = bt_class; + conn->dev.parent = &hdev->dev; + + device_initialize(&conn->dev); +} + +void hci_conn_add_sysfs(struct hci_conn *conn) { - struct hci_conn *conn = container_of(work, struct hci_conn, work_add); struct hci_dev *hdev = conn->hdev; + BT_DBG("conn %p", conn); + dev_set_name(&conn->dev, "%s:%d", hdev->name, conn->handle); dev_set_drvdata(&conn->dev, conn); @@ -103,19 +130,8 @@ static void add_conn(struct work_struct *work) hci_dev_hold(hdev); } -/* - * The rfcomm tty device will possibly retain even when conn - * is down, and sysfs doesn't support move zombie device, - * so we should move the device before conn device is destroyed. - */ -static int __match_tty(struct device *dev, void *data) -{ - return !strncmp(dev_name(dev), "rfcomm", 6); -} - -static void del_conn(struct work_struct *work) +void hci_conn_del_sysfs(struct hci_conn *conn) { - struct hci_conn *conn = container_of(work, struct hci_conn, work_del); struct hci_dev *hdev = conn->hdev; if (!device_is_registered(&conn->dev)) @@ -137,36 +153,6 @@ static void del_conn(struct work_struct *work) hci_dev_put(hdev); } -void hci_conn_init_sysfs(struct hci_conn *conn) -{ - struct hci_dev *hdev = conn->hdev; - - BT_DBG("conn %p", conn); - - conn->dev.type = &bt_link; - conn->dev.class = bt_class; - conn->dev.parent = &hdev->dev; - - device_initialize(&conn->dev); - - INIT_WORK(&conn->work_add, add_conn); - INIT_WORK(&conn->work_del, del_conn); -} - -void hci_conn_add_sysfs(struct hci_conn *conn) -{ - BT_DBG("conn %p", conn); - - queue_work(conn->hdev->workqueue, &conn->work_add); -} - -void hci_conn_del_sysfs(struct hci_conn *conn) -{ - BT_DBG("conn %p", conn); - - queue_work(conn->hdev->workqueue, &conn->work_del); -} - static inline char *host_bustostr(int bus) { switch (bus) { @@ -400,7 +386,7 @@ static int inquiry_cache_show(struct seq_file *f, void *p) struct inquiry_cache *cache = &hdev->inq_cache; struct inquiry_entry *e; - hci_dev_lock_bh(hdev); + hci_dev_lock(hdev); for (e = cache->list; e; e = e->next) { struct inquiry_data *data = &e->data; @@ -413,7 +399,7 @@ static int inquiry_cache_show(struct seq_file *f, void *p) data->rssi, data->ssp_mode, e->timestamp); } - hci_dev_unlock_bh(hdev); + hci_dev_unlock(hdev); return 0; } @@ -433,19 +419,14 @@ static const struct file_operations inquiry_cache_fops = { static int blacklist_show(struct seq_file *f, void *p) { struct hci_dev *hdev = f->private; - struct list_head *l; + struct bdaddr_list *b; - hci_dev_lock_bh(hdev); - - list_for_each(l, &hdev->blacklist) { - struct bdaddr_list *b; - - b = list_entry(l, struct bdaddr_list, list); + hci_dev_lock(hdev); + list_for_each_entry(b, &hdev->blacklist, list) seq_printf(f, "%s\n", batostr(&b->bdaddr)); - } - hci_dev_unlock_bh(hdev); + hci_dev_unlock(hdev); return 0; } @@ -482,19 +463,14 @@ static void print_bt_uuid(struct seq_file *f, u8 *uuid) static int uuids_show(struct seq_file *f, void *p) { struct hci_dev *hdev = f->private; - struct list_head *l; - - hci_dev_lock_bh(hdev); - - list_for_each(l, &hdev->uuids) { - struct bt_uuid *uuid; + struct bt_uuid *uuid; - uuid = list_entry(l, struct bt_uuid, list); + hci_dev_lock(hdev); + list_for_each_entry(uuid, &hdev->uuids, list) print_bt_uuid(f, uuid->uuid); - } - hci_dev_unlock_bh(hdev); + hci_dev_unlock(hdev); return 0; } @@ -515,11 +491,11 @@ static int auto_accept_delay_set(void *data, u64 val) { struct hci_dev *hdev = data; - hci_dev_lock_bh(hdev); + hci_dev_lock(hdev); hdev->auto_accept_delay = val; - hci_dev_unlock_bh(hdev); + hci_dev_unlock(hdev); return 0; } @@ -528,11 +504,11 @@ static int auto_accept_delay_get(void *data, u64 *val) { struct hci_dev *hdev = data; - hci_dev_lock_bh(hdev); + hci_dev_lock(hdev); *val = hdev->auto_accept_delay; - hci_dev_unlock_bh(hdev); + hci_dev_unlock(hdev); return 0; } @@ -540,22 +516,28 @@ static int auto_accept_delay_get(void *data, u64 *val) DEFINE_SIMPLE_ATTRIBUTE(auto_accept_delay_fops, auto_accept_delay_get, auto_accept_delay_set, "%llu\n"); -int hci_register_sysfs(struct hci_dev *hdev) +void hci_init_sysfs(struct hci_dev *hdev) +{ + struct device *dev = &hdev->dev; + + dev->type = &bt_host; + dev->class = bt_class; + + dev_set_drvdata(dev, hdev); + device_initialize(dev); +} + +int hci_add_sysfs(struct hci_dev *hdev) { struct device *dev = &hdev->dev; int err; BT_DBG("%p name %s bus %d", hdev, hdev->name, hdev->bus); - dev->type = &bt_host; - dev->class = bt_class; dev->parent = hdev->parent; - dev_set_name(dev, "%s", hdev->name); - dev_set_drvdata(dev, hdev); - - err = device_register(dev); + err = device_add(dev); if (err < 0) return err; @@ -579,7 +561,7 @@ int hci_register_sysfs(struct hci_dev *hdev) return 0; } -void hci_unregister_sysfs(struct hci_dev *hdev) +void hci_del_sysfs(struct hci_dev *hdev) { BT_DBG("%p name %s bus %d", hdev, hdev->name, hdev->bus); diff --git a/net/bluetooth/hidp/Kconfig b/net/bluetooth/hidp/Kconfig index 86a91543172a..4deaca78e91e 100644 --- a/net/bluetooth/hidp/Kconfig +++ b/net/bluetooth/hidp/Kconfig @@ -1,6 +1,6 @@ config BT_HIDP tristate "HIDP protocol support" - depends on BT && BT_L2CAP && INPUT && HID_SUPPORT + depends on BT && INPUT && HID_SUPPORT select HID help HIDP (Human Interface Device Protocol) is a transport layer diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index fb68f344c34a..d478be11d562 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -81,24 +81,20 @@ static unsigned char hidp_mkeyspat[] = { 0x01, 0x01, 0x01, 0x01, 0x01, 0x01 }; static struct hidp_session *__hidp_get_session(bdaddr_t *bdaddr) { struct hidp_session *session; - struct list_head *p; BT_DBG(""); - list_for_each(p, &hidp_session_list) { - session = list_entry(p, struct hidp_session, list); + list_for_each_entry(session, &hidp_session_list, list) { if (!bacmp(bdaddr, &session->bdaddr)) return session; } + return NULL; } static void __hidp_link_session(struct hidp_session *session) { - __module_get(THIS_MODULE); list_add(&session->list, &hidp_session_list); - - hci_conn_hold_device(session->conn); } static void __hidp_unlink_session(struct hidp_session *session) @@ -106,7 +102,6 @@ static void __hidp_unlink_session(struct hidp_session *session) hci_conn_put_device(session->conn); list_del(&session->list); - module_put(THIS_MODULE); } static void __hidp_copy_session(struct hidp_session *session, struct hidp_conninfo *ci) @@ -255,6 +250,9 @@ static int __hidp_send_ctrl_message(struct hidp_session *session, BT_DBG("session %p data %p size %d", session, data, size); + if (atomic_read(&session->terminate)) + return -EIO; + skb = alloc_skb(size + 1, GFP_ATOMIC); if (!skb) { BT_ERR("Can't allocate memory for new frame"); @@ -329,6 +327,7 @@ static int hidp_get_raw_report(struct hid_device *hid, struct sk_buff *skb; size_t len; int numbered_reports = hid->report_enum[report_type].numbered; + int ret; switch (report_type) { case HID_FEATURE_REPORT: @@ -352,8 +351,9 @@ static int hidp_get_raw_report(struct hid_device *hid, session->waiting_report_number = numbered_reports ? report_number : -1; set_bit(HIDP_WAITING_FOR_RETURN, &session->flags); data[0] = report_number; - if (hidp_send_ctrl_message(hid->driver_data, report_type, data, 1)) - goto err_eio; + ret = hidp_send_ctrl_message(hid->driver_data, report_type, data, 1); + if (ret) + goto err; /* Wait for the return of the report. The returned report gets put in session->report_return. */ @@ -365,11 +365,13 @@ static int hidp_get_raw_report(struct hid_device *hid, 5*HZ); if (res == 0) { /* timeout */ - goto err_eio; + ret = -EIO; + goto err; } if (res < 0) { /* signal */ - goto err_restartsys; + ret = -ERESTARTSYS; + goto err; } } @@ -390,14 +392,10 @@ static int hidp_get_raw_report(struct hid_device *hid, return len; -err_restartsys: - clear_bit(HIDP_WAITING_FOR_RETURN, &session->flags); - mutex_unlock(&session->report_mutex); - return -ERESTARTSYS; -err_eio: +err: clear_bit(HIDP_WAITING_FOR_RETURN, &session->flags); mutex_unlock(&session->report_mutex); - return -EIO; + return ret; } static int hidp_output_raw_report(struct hid_device *hid, unsigned char *data, size_t count, @@ -422,11 +420,10 @@ static int hidp_output_raw_report(struct hid_device *hid, unsigned char *data, s /* Set up our wait, and send the report request to the device. */ set_bit(HIDP_WAITING_FOR_SEND_ACK, &session->flags); - if (hidp_send_ctrl_message(hid->driver_data, report_type, - data, count)) { - ret = -ENOMEM; + ret = hidp_send_ctrl_message(hid->driver_data, report_type, data, + count); + if (ret) goto err; - } /* Wait for the ACK from the device. */ while (test_bit(HIDP_WAITING_FOR_SEND_ACK, &session->flags)) { @@ -496,10 +493,9 @@ static void hidp_process_handshake(struct hidp_session *session, case HIDP_HSHK_ERR_INVALID_REPORT_ID: case HIDP_HSHK_ERR_UNSUPPORTED_REQUEST: case HIDP_HSHK_ERR_INVALID_PARAMETER: - if (test_bit(HIDP_WAITING_FOR_RETURN, &session->flags)) { - clear_bit(HIDP_WAITING_FOR_RETURN, &session->flags); + if (test_and_clear_bit(HIDP_WAITING_FOR_RETURN, &session->flags)) wake_up_interruptible(&session->report_queue); - } + /* FIXME: Call into SET_ GET_ handlers here */ break; @@ -520,10 +516,8 @@ static void hidp_process_handshake(struct hidp_session *session, } /* Wake up the waiting thread. */ - if (test_bit(HIDP_WAITING_FOR_SEND_ACK, &session->flags)) { - clear_bit(HIDP_WAITING_FOR_SEND_ACK, &session->flags); + if (test_and_clear_bit(HIDP_WAITING_FOR_SEND_ACK, &session->flags)) wake_up_interruptible(&session->report_queue); - } } static void hidp_process_hid_control(struct hidp_session *session, @@ -663,25 +657,32 @@ static int hidp_send_frame(struct socket *sock, unsigned char *data, int len) return kernel_sendmsg(sock, &msg, &iv, 1, len); } -static void hidp_process_transmit(struct hidp_session *session) +static void hidp_process_intr_transmit(struct hidp_session *session) { struct sk_buff *skb; BT_DBG("session %p", session); - while ((skb = skb_dequeue(&session->ctrl_transmit))) { - if (hidp_send_frame(session->ctrl_sock, skb->data, skb->len) < 0) { - skb_queue_head(&session->ctrl_transmit, skb); + while ((skb = skb_dequeue(&session->intr_transmit))) { + if (hidp_send_frame(session->intr_sock, skb->data, skb->len) < 0) { + skb_queue_head(&session->intr_transmit, skb); break; } hidp_set_timer(session); kfree_skb(skb); } +} - while ((skb = skb_dequeue(&session->intr_transmit))) { - if (hidp_send_frame(session->intr_sock, skb->data, skb->len) < 0) { - skb_queue_head(&session->intr_transmit, skb); +static void hidp_process_ctrl_transmit(struct hidp_session *session) +{ + struct sk_buff *skb; + + BT_DBG("session %p", session); + + while ((skb = skb_dequeue(&session->ctrl_transmit))) { + if (hidp_send_frame(session->ctrl_sock, skb->data, skb->len) < 0) { + skb_queue_head(&session->ctrl_transmit, skb); break; } @@ -700,6 +701,7 @@ static int hidp_session(void *arg) BT_DBG("session %p", session); + __module_get(THIS_MODULE); set_user_nice(current, -15); init_waitqueue_entry(&ctrl_wait, current); @@ -714,17 +716,25 @@ static int hidp_session(void *arg) intr_sk->sk_state != BT_CONNECTED) break; - while ((skb = skb_dequeue(&ctrl_sk->sk_receive_queue))) { + while ((skb = skb_dequeue(&intr_sk->sk_receive_queue))) { skb_orphan(skb); - hidp_recv_ctrl_frame(session, skb); + if (!skb_linearize(skb)) + hidp_recv_intr_frame(session, skb); + else + kfree_skb(skb); } - while ((skb = skb_dequeue(&intr_sk->sk_receive_queue))) { + hidp_process_intr_transmit(session); + + while ((skb = skb_dequeue(&ctrl_sk->sk_receive_queue))) { skb_orphan(skb); - hidp_recv_intr_frame(session, skb); + if (!skb_linearize(skb)) + hidp_recv_ctrl_frame(session, skb); + else + kfree_skb(skb); } - hidp_process_transmit(session); + hidp_process_ctrl_transmit(session); schedule(); set_current_state(TASK_INTERRUPTIBLE); @@ -733,6 +743,10 @@ static int hidp_session(void *arg) remove_wait_queue(sk_sleep(intr_sk), &intr_wait); remove_wait_queue(sk_sleep(ctrl_sk), &ctrl_wait); + clear_bit(HIDP_WAITING_FOR_SEND_ACK, &session->flags); + clear_bit(HIDP_WAITING_FOR_RETURN, &session->flags); + wake_up_interruptible(&session->report_queue); + down_write(&hidp_session_sem); hidp_del_timer(session); @@ -766,34 +780,37 @@ static int hidp_session(void *arg) kfree(session->rd_data); kfree(session); + module_put_and_exit(0); return 0; } -static struct device *hidp_get_device(struct hidp_session *session) +static struct hci_conn *hidp_get_connection(struct hidp_session *session) { bdaddr_t *src = &bt_sk(session->ctrl_sock->sk)->src; bdaddr_t *dst = &bt_sk(session->ctrl_sock->sk)->dst; - struct device *device = NULL; + struct hci_conn *conn; struct hci_dev *hdev; hdev = hci_get_route(dst, src); if (!hdev) return NULL; - session->conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, dst); - if (session->conn) - device = &session->conn->dev; + hci_dev_lock(hdev); + conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, dst); + if (conn) + hci_conn_hold_device(conn); + hci_dev_unlock(hdev); hci_dev_put(hdev); - return device; + return conn; } static int hidp_setup_input(struct hidp_session *session, struct hidp_connadd_req *req) { struct input_dev *input; - int err, i; + int i; input = input_allocate_device(); if (!input) @@ -836,17 +853,10 @@ static int hidp_setup_input(struct hidp_session *session, input->relbit[0] |= BIT_MASK(REL_WHEEL); } - input->dev.parent = hidp_get_device(session); + input->dev.parent = &session->conn->dev; input->event = hidp_input_event; - err = input_register_device(input); - if (err < 0) { - input_free_device(input); - session->input = NULL; - return err; - } - return 0; } @@ -872,6 +882,9 @@ static int hidp_start(struct hid_device *hid) struct hidp_session *session = hid->driver_data; struct hid_report *report; + if (hid->quirks & HID_QUIRK_NO_INIT_REPORTS) + return 0; + list_for_each_entry(report, &hid->report_enum[HID_INPUT_REPORT]. report_list, list) hidp_send_report(session, report); @@ -940,7 +953,7 @@ static int hidp_setup_hid(struct hidp_session *session, strncpy(hid->phys, batostr(&bt_sk(session->ctrl_sock->sk)->src), 64); strncpy(hid->uniq, batostr(&bt_sk(session->ctrl_sock->sk)->dst), 64); - hid->dev.parent = hidp_get_device(session); + hid->dev.parent = &session->conn->dev; hid->ll_driver = &hidp_hid_driver; hid->hid_get_raw_report = hidp_get_raw_report; @@ -967,18 +980,20 @@ int hidp_add_connection(struct hidp_connadd_req *req, struct socket *ctrl_sock, bacmp(&bt_sk(ctrl_sock->sk)->dst, &bt_sk(intr_sock->sk)->dst)) return -ENOTUNIQ; - session = kzalloc(sizeof(struct hidp_session), GFP_KERNEL); - if (!session) - return -ENOMEM; - BT_DBG("rd_data %p rd_size %d", req->rd_data, req->rd_size); down_write(&hidp_session_sem); s = __hidp_get_session(&bt_sk(ctrl_sock->sk)->dst); if (s && s->state == BT_CONNECTED) { - err = -EEXIST; - goto failed; + up_write(&hidp_session_sem); + return -EEXIST; + } + + session = kzalloc(sizeof(struct hidp_session), GFP_KERNEL); + if (!session) { + up_write(&hidp_session_sem); + return -ENOMEM; } bacpy(&session->bdaddr, &bt_sk(ctrl_sock->sk)->dst); @@ -994,6 +1009,12 @@ int hidp_add_connection(struct hidp_connadd_req *req, struct socket *ctrl_sock, session->intr_sock = intr_sock; session->state = BT_CONNECTED; + session->conn = hidp_get_connection(session); + if (!session->conn) { + err = -ENOTCONN; + goto failed; + } + setup_timer(&session->timer, hidp_idle_timeout, (unsigned long)session); skb_queue_head_init(&session->ctrl_transmit); @@ -1006,9 +1027,11 @@ int hidp_add_connection(struct hidp_connadd_req *req, struct socket *ctrl_sock, session->flags = req->flags & (1 << HIDP_BLUETOOTH_VENDOR_ID); session->idle_to = req->idle_to; + __hidp_link_session(session); + if (req->rd_size > 0) { err = hidp_setup_hid(session, req); - if (err && err != -ENODEV) + if (err) goto purge; } @@ -1018,8 +1041,6 @@ int hidp_add_connection(struct hidp_connadd_req *req, struct socket *ctrl_sock, goto purge; } - __hidp_link_session(session); - hidp_set_timer(session); if (session->hid) { @@ -1045,7 +1066,11 @@ int hidp_add_connection(struct hidp_connadd_req *req, struct socket *ctrl_sock, !session->waiting_for_startup); } - err = hid_add_device(session->hid); + if (session->hid) + err = hid_add_device(session->hid); + else + err = input_register_device(session->input); + if (err < 0) { atomic_inc(&session->terminate); wake_up_process(session->task); @@ -1068,8 +1093,6 @@ int hidp_add_connection(struct hidp_connadd_req *req, struct socket *ctrl_sock, unlink: hidp_del_timer(session); - __hidp_unlink_session(session); - if (session->input) { input_unregister_device(session->input); session->input = NULL; @@ -1084,6 +1107,8 @@ unlink: session->rd_data = NULL; purge: + __hidp_unlink_session(session); + skb_queue_purge(&session->ctrl_transmit); skb_queue_purge(&session->intr_transmit); @@ -1125,19 +1150,16 @@ int hidp_del_connection(struct hidp_conndel_req *req) int hidp_get_connlist(struct hidp_connlist_req *req) { - struct list_head *p; + struct hidp_session *session; int err = 0, n = 0; BT_DBG(""); down_read(&hidp_session_sem); - list_for_each(p, &hidp_session_list) { - struct hidp_session *session; + list_for_each_entry(session, &hidp_session_list, list) { struct hidp_conninfo ci; - session = list_entry(p, struct hidp_session, list); - __hidp_copy_session(session, &ci); if (copy_to_user(req->ci, &ci, sizeof(ci))) { diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index b3bdb482bbe6..faf0b11ac1d3 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -3,6 +3,7 @@ Copyright (C) 2000-2001 Qualcomm Incorporated Copyright (C) 2009-2010 Gustavo F. Padovan <gustavo@padovan.org> Copyright (C) 2010 Google Inc. + Copyright (C) 2011 ProFUSION Embedded Systems Written 2000,2001 by Maxim Krasnyansky <maxk@qualcomm.com> @@ -56,10 +57,10 @@ #include <net/bluetooth/l2cap.h> #include <net/bluetooth/smp.h> -int disable_ertm; +bool disable_ertm; static u32 l2cap_feat_mask = L2CAP_FEAT_FIXED_CHAN; -static u8 l2cap_fixed_chan[8] = { 0x02, }; +static u8 l2cap_fixed_chan[8] = { L2CAP_FC_L2CAP, }; static LIST_HEAD(chan_list); static DEFINE_RWLOCK(chan_list_lock); @@ -76,38 +77,38 @@ static int l2cap_ertm_data_rcv(struct sock *sk, struct sk_buff *skb); /* ---- L2CAP channels ---- */ -static inline void chan_hold(struct l2cap_chan *c) -{ - atomic_inc(&c->refcnt); -} - -static inline void chan_put(struct l2cap_chan *c) -{ - if (atomic_dec_and_test(&c->refcnt)) - kfree(c); -} - static struct l2cap_chan *__l2cap_get_chan_by_dcid(struct l2cap_conn *conn, u16 cid) { - struct l2cap_chan *c; + struct l2cap_chan *c, *r = NULL; - list_for_each_entry(c, &conn->chan_l, list) { - if (c->dcid == cid) - return c; + rcu_read_lock(); + + list_for_each_entry_rcu(c, &conn->chan_l, list) { + if (c->dcid == cid) { + r = c; + break; + } } - return NULL; + rcu_read_unlock(); + return r; } static struct l2cap_chan *__l2cap_get_chan_by_scid(struct l2cap_conn *conn, u16 cid) { - struct l2cap_chan *c; + struct l2cap_chan *c, *r = NULL; - list_for_each_entry(c, &conn->chan_l, list) { - if (c->scid == cid) - return c; + rcu_read_lock(); + + list_for_each_entry_rcu(c, &conn->chan_l, list) { + if (c->scid == cid) { + r = c; + break; + } } - return NULL; + + rcu_read_unlock(); + return r; } /* Find channel with given SCID. @@ -116,34 +117,36 @@ static struct l2cap_chan *l2cap_get_chan_by_scid(struct l2cap_conn *conn, u16 ci { struct l2cap_chan *c; - read_lock(&conn->chan_lock); c = __l2cap_get_chan_by_scid(conn, cid); if (c) - bh_lock_sock(c->sk); - read_unlock(&conn->chan_lock); + lock_sock(c->sk); return c; } static struct l2cap_chan *__l2cap_get_chan_by_ident(struct l2cap_conn *conn, u8 ident) { - struct l2cap_chan *c; + struct l2cap_chan *c, *r = NULL; - list_for_each_entry(c, &conn->chan_l, list) { - if (c->ident == ident) - return c; + rcu_read_lock(); + + list_for_each_entry_rcu(c, &conn->chan_l, list) { + if (c->ident == ident) { + r = c; + break; + } } - return NULL; + + rcu_read_unlock(); + return r; } static inline struct l2cap_chan *l2cap_get_chan_by_ident(struct l2cap_conn *conn, u8 ident) { struct l2cap_chan *c; - read_lock(&conn->chan_lock); c = __l2cap_get_chan_by_ident(conn, ident); if (c) - bh_lock_sock(c->sk); - read_unlock(&conn->chan_lock); + lock_sock(c->sk); return c; } @@ -153,19 +156,16 @@ static struct l2cap_chan *__l2cap_global_chan_by_addr(__le16 psm, bdaddr_t *src) list_for_each_entry(c, &chan_list, global_l) { if (c->sport == psm && !bacmp(&bt_sk(c->sk)->src, src)) - goto found; + return c; } - - c = NULL; -found: - return c; + return NULL; } int l2cap_add_psm(struct l2cap_chan *chan, bdaddr_t *src, __le16 psm) { int err; - write_lock_bh(&chan_list_lock); + write_lock(&chan_list_lock); if (psm && __l2cap_global_chan_by_addr(psm, src)) { err = -EADDRINUSE; @@ -190,17 +190,17 @@ int l2cap_add_psm(struct l2cap_chan *chan, bdaddr_t *src, __le16 psm) } done: - write_unlock_bh(&chan_list_lock); + write_unlock(&chan_list_lock); return err; } int l2cap_add_scid(struct l2cap_chan *chan, __u16 scid) { - write_lock_bh(&chan_list_lock); + write_lock(&chan_list_lock); chan->scid = scid; - write_unlock_bh(&chan_list_lock); + write_unlock(&chan_list_lock); return 0; } @@ -217,45 +217,51 @@ static u16 l2cap_alloc_cid(struct l2cap_conn *conn) return 0; } -static void l2cap_set_timer(struct l2cap_chan *chan, struct timer_list *timer, long timeout) -{ - BT_DBG("chan %p state %d timeout %ld", chan->sk, chan->state, timeout); - - if (!mod_timer(timer, jiffies + msecs_to_jiffies(timeout))) - chan_hold(chan); -} - -static void l2cap_clear_timer(struct l2cap_chan *chan, struct timer_list *timer) +static char *state_to_string(int state) { - BT_DBG("chan %p state %d", chan, chan->state); + switch(state) { + case BT_CONNECTED: + return "BT_CONNECTED"; + case BT_OPEN: + return "BT_OPEN"; + case BT_BOUND: + return "BT_BOUND"; + case BT_LISTEN: + return "BT_LISTEN"; + case BT_CONNECT: + return "BT_CONNECT"; + case BT_CONNECT2: + return "BT_CONNECT2"; + case BT_CONFIG: + return "BT_CONFIG"; + case BT_DISCONN: + return "BT_DISCONN"; + case BT_CLOSED: + return "BT_CLOSED"; + } - if (timer_pending(timer) && del_timer(timer)) - chan_put(chan); + return "invalid state"; } static void l2cap_state_change(struct l2cap_chan *chan, int state) { + BT_DBG("%p %s -> %s", chan, state_to_string(chan->state), + state_to_string(state)); + chan->state = state; chan->ops->state_change(chan->data, state); } -static void l2cap_chan_timeout(unsigned long arg) +static void l2cap_chan_timeout(struct work_struct *work) { - struct l2cap_chan *chan = (struct l2cap_chan *) arg; + struct l2cap_chan *chan = container_of(work, struct l2cap_chan, + chan_timer.work); struct sock *sk = chan->sk; int reason; BT_DBG("chan %p state %d", chan, chan->state); - bh_lock_sock(sk); - - if (sock_owned_by_user(sk)) { - /* sk is owned by user. Try again later */ - __set_chan_timer(chan, HZ / 5); - bh_unlock_sock(sk); - chan_put(chan); - return; - } + lock_sock(sk); if (chan->state == BT_CONNECTED || chan->state == BT_CONFIG) reason = ECONNREFUSED; @@ -267,10 +273,10 @@ static void l2cap_chan_timeout(unsigned long arg) l2cap_chan_close(chan, reason); - bh_unlock_sock(sk); + release_sock(sk); chan->ops->close(chan->data); - chan_put(chan); + l2cap_chan_put(chan); } struct l2cap_chan *l2cap_chan_create(struct sock *sk) @@ -283,34 +289,36 @@ struct l2cap_chan *l2cap_chan_create(struct sock *sk) chan->sk = sk; - write_lock_bh(&chan_list_lock); + write_lock(&chan_list_lock); list_add(&chan->global_l, &chan_list); - write_unlock_bh(&chan_list_lock); + write_unlock(&chan_list_lock); - setup_timer(&chan->chan_timer, l2cap_chan_timeout, (unsigned long) chan); + INIT_DELAYED_WORK(&chan->chan_timer, l2cap_chan_timeout); chan->state = BT_OPEN; atomic_set(&chan->refcnt, 1); + BT_DBG("sk %p chan %p", sk, chan); + return chan; } void l2cap_chan_destroy(struct l2cap_chan *chan) { - write_lock_bh(&chan_list_lock); + write_lock(&chan_list_lock); list_del(&chan->global_l); - write_unlock_bh(&chan_list_lock); + write_unlock(&chan_list_lock); - chan_put(chan); + l2cap_chan_put(chan); } -static void __l2cap_chan_add(struct l2cap_conn *conn, struct l2cap_chan *chan) +static void l2cap_chan_add(struct l2cap_conn *conn, struct l2cap_chan *chan) { BT_DBG("conn %p, psm 0x%2.2x, dcid 0x%4.4x", conn, chan->psm, chan->dcid); - conn->disc_reason = 0x13; + conn->disc_reason = HCI_ERROR_REMOTE_USER_TERM; chan->conn = conn; @@ -337,9 +345,16 @@ static void __l2cap_chan_add(struct l2cap_conn *conn, struct l2cap_chan *chan) chan->omtu = L2CAP_DEFAULT_MTU; } - chan_hold(chan); + chan->local_id = L2CAP_BESTEFFORT_ID; + chan->local_stype = L2CAP_SERV_BESTEFFORT; + chan->local_msdu = L2CAP_DEFAULT_MAX_SDU_SIZE; + chan->local_sdu_itime = L2CAP_DEFAULT_SDU_ITIME; + chan->local_acc_lat = L2CAP_DEFAULT_ACC_LAT; + chan->local_flush_to = L2CAP_DEFAULT_FLUSH_TO; + + l2cap_chan_hold(chan); - list_add(&chan->list, &conn->chan_l); + list_add_rcu(&chan->list, &conn->chan_l); } /* Delete channel. @@ -356,10 +371,10 @@ static void l2cap_chan_del(struct l2cap_chan *chan, int err) if (conn) { /* Delete from channel list */ - write_lock_bh(&conn->chan_lock); - list_del(&chan->list); - write_unlock_bh(&conn->chan_lock); - chan_put(chan); + list_del_rcu(&chan->list); + synchronize_rcu(); + + l2cap_chan_put(chan); chan->conn = NULL; hci_conn_put(conn->hcon); @@ -508,7 +523,7 @@ static inline u8 l2cap_get_auth_type(struct l2cap_chan *chan) } /* Service level security */ -static inline int l2cap_check_security(struct l2cap_chan *chan) +int l2cap_chan_check_security(struct l2cap_chan *chan) { struct l2cap_conn *conn = chan->conn; __u8 auth_type; @@ -528,14 +543,14 @@ static u8 l2cap_get_ident(struct l2cap_conn *conn) * 200 - 254 are used by utilities like l2ping, etc. */ - spin_lock_bh(&conn->lock); + spin_lock(&conn->lock); if (++conn->tx_ident > 128) conn->tx_ident = 1; id = conn->tx_ident; - spin_unlock_bh(&conn->lock); + spin_unlock(&conn->lock); return id; } @@ -556,34 +571,58 @@ static void l2cap_send_cmd(struct l2cap_conn *conn, u8 ident, u8 code, u16 len, flags = ACL_START; bt_cb(skb)->force_active = BT_POWER_FORCE_ACTIVE_ON; + skb->priority = HCI_PRIO_MAX; + + hci_send_acl(conn->hchan, skb, flags); +} + +static void l2cap_do_send(struct l2cap_chan *chan, struct sk_buff *skb) +{ + struct hci_conn *hcon = chan->conn->hcon; + u16 flags; - hci_send_acl(conn->hcon, skb, flags); + BT_DBG("chan %p, skb %p len %d priority %u", chan, skb, skb->len, + skb->priority); + + if (!test_bit(FLAG_FLUSHABLE, &chan->flags) && + lmp_no_flush_capable(hcon->hdev)) + flags = ACL_START_NO_FLUSH; + else + flags = ACL_START; + + bt_cb(skb)->force_active = test_bit(FLAG_FORCE_ACTIVE, &chan->flags); + hci_send_acl(chan->conn->hchan, skb, flags); } -static inline void l2cap_send_sframe(struct l2cap_chan *chan, u16 control) +static inline void l2cap_send_sframe(struct l2cap_chan *chan, u32 control) { struct sk_buff *skb; struct l2cap_hdr *lh; struct l2cap_conn *conn = chan->conn; - int count, hlen = L2CAP_HDR_SIZE + 2; - u8 flags; + int count, hlen; if (chan->state != BT_CONNECTED) return; + if (test_bit(FLAG_EXT_CTRL, &chan->flags)) + hlen = L2CAP_EXT_HDR_SIZE; + else + hlen = L2CAP_ENH_HDR_SIZE; + if (chan->fcs == L2CAP_FCS_CRC16) - hlen += 2; + hlen += L2CAP_FCS_SIZE; - BT_DBG("chan %p, control 0x%2.2x", chan, control); + BT_DBG("chan %p, control 0x%8.8x", chan, control); count = min_t(unsigned int, conn->mtu, hlen); - control |= L2CAP_CTRL_FRAME_TYPE; + + control |= __set_sframe(chan); if (test_and_clear_bit(CONN_SEND_FBIT, &chan->conn_state)) - control |= L2CAP_CTRL_FINAL; + control |= __set_ctrl_final(chan); if (test_and_clear_bit(CONN_SEND_PBIT, &chan->conn_state)) - control |= L2CAP_CTRL_POLL; + control |= __set_ctrl_poll(chan); skb = bt_skb_alloc(count, GFP_ATOMIC); if (!skb) @@ -592,32 +631,27 @@ static inline void l2cap_send_sframe(struct l2cap_chan *chan, u16 control) lh = (struct l2cap_hdr *) skb_put(skb, L2CAP_HDR_SIZE); lh->len = cpu_to_le16(hlen - L2CAP_HDR_SIZE); lh->cid = cpu_to_le16(chan->dcid); - put_unaligned_le16(control, skb_put(skb, 2)); + + __put_control(chan, control, skb_put(skb, __ctrl_size(chan))); if (chan->fcs == L2CAP_FCS_CRC16) { - u16 fcs = crc16(0, (u8 *)lh, count - 2); - put_unaligned_le16(fcs, skb_put(skb, 2)); + u16 fcs = crc16(0, (u8 *)lh, count - L2CAP_FCS_SIZE); + put_unaligned_le16(fcs, skb_put(skb, L2CAP_FCS_SIZE)); } - if (lmp_no_flush_capable(conn->hcon->hdev)) - flags = ACL_START_NO_FLUSH; - else - flags = ACL_START; - - bt_cb(skb)->force_active = chan->force_active; - - hci_send_acl(chan->conn->hcon, skb, flags); + skb->priority = HCI_PRIO_MAX; + l2cap_do_send(chan, skb); } -static inline void l2cap_send_rr_or_rnr(struct l2cap_chan *chan, u16 control) +static inline void l2cap_send_rr_or_rnr(struct l2cap_chan *chan, u32 control) { if (test_bit(CONN_LOCAL_BUSY, &chan->conn_state)) { - control |= L2CAP_SUPER_RCV_NOT_READY; + control |= __set_ctrl_super(chan, L2CAP_SUPER_RNR); set_bit(CONN_RNR_SENT, &chan->conn_state); } else - control |= L2CAP_SUPER_RCV_READY; + control |= __set_ctrl_super(chan, L2CAP_SUPER_RR); - control |= chan->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT; + control |= __set_reqseq(chan, chan->buffer_seq); l2cap_send_sframe(chan, control); } @@ -635,7 +669,7 @@ static void l2cap_do_start(struct l2cap_chan *chan) if (!(conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_DONE)) return; - if (l2cap_check_security(chan) && + if (l2cap_chan_check_security(chan) && __l2cap_no_conn_pending(chan)) { struct l2cap_conn_req req; req.scid = cpu_to_le16(chan->scid); @@ -654,7 +688,7 @@ static void l2cap_do_start(struct l2cap_chan *chan) conn->info_state |= L2CAP_INFO_FEAT_MASK_REQ_SENT; conn->info_ident = l2cap_get_ident(conn); - mod_timer(&conn->info_timer, jiffies + + schedule_delayed_work(&conn->info_timer, msecs_to_jiffies(L2CAP_INFO_TIMEOUT)); l2cap_send_cmd(conn, conn->info_ident, @@ -706,13 +740,13 @@ static void l2cap_send_disconn_req(struct l2cap_conn *conn, struct l2cap_chan *c /* ---- L2CAP connections ---- */ static void l2cap_conn_start(struct l2cap_conn *conn) { - struct l2cap_chan *chan, *tmp; + struct l2cap_chan *chan; BT_DBG("conn %p", conn); - read_lock(&conn->chan_lock); + rcu_read_lock(); - list_for_each_entry_safe(chan, tmp, &conn->chan_l, list) { + list_for_each_entry_rcu(chan, &conn->chan_l, list) { struct sock *sk = chan->sk; bh_lock_sock(sk); @@ -725,7 +759,7 @@ static void l2cap_conn_start(struct l2cap_conn *conn) if (chan->state == BT_CONNECT) { struct l2cap_conn_req req; - if (!l2cap_check_security(chan) || + if (!l2cap_chan_check_security(chan) || !__l2cap_no_conn_pending(chan)) { bh_unlock_sock(sk); continue; @@ -736,9 +770,7 @@ static void l2cap_conn_start(struct l2cap_conn *conn) &chan->conf_state)) { /* l2cap_chan_close() calls list_del(chan) * so release the lock */ - read_unlock(&conn->chan_lock); l2cap_chan_close(chan, ECONNRESET); - read_lock(&conn->chan_lock); bh_unlock_sock(sk); continue; } @@ -758,7 +790,7 @@ static void l2cap_conn_start(struct l2cap_conn *conn) rsp.scid = cpu_to_le16(chan->dcid); rsp.dcid = cpu_to_le16(chan->scid); - if (l2cap_check_security(chan)) { + if (l2cap_chan_check_security(chan)) { if (bt_sk(sk)->defer_setup) { struct sock *parent = bt_sk(sk)->parent; rsp.result = cpu_to_le16(L2CAP_CR_PEND); @@ -794,7 +826,7 @@ static void l2cap_conn_start(struct l2cap_conn *conn) bh_unlock_sock(sk); } - read_unlock(&conn->chan_lock); + rcu_read_unlock(); } /* Find socket with cid and source bdaddr. @@ -845,7 +877,7 @@ static void l2cap_le_conn_ready(struct l2cap_conn *conn) parent = pchan->sk; - bh_lock_sock(parent); + lock_sock(parent); /* Check for backlog size */ if (sk_acceptq_is_full(parent)) { @@ -859,8 +891,6 @@ static void l2cap_le_conn_ready(struct l2cap_conn *conn) sk = chan->sk; - write_lock_bh(&conn->chan_lock); - hci_conn_hold(conn->hcon); bacpy(&bt_sk(sk)->src, conn->src); @@ -868,17 +898,15 @@ static void l2cap_le_conn_ready(struct l2cap_conn *conn) bt_accept_enqueue(parent, sk); - __l2cap_chan_add(conn, chan); + l2cap_chan_add(conn, chan); __set_chan_timer(chan, sk->sk_sndtimeo); l2cap_state_change(chan, BT_CONNECTED); parent->sk_data_ready(parent, 0); - write_unlock_bh(&conn->chan_lock); - clean: - bh_unlock_sock(parent); + release_sock(parent); } static void l2cap_chan_ready(struct sock *sk) @@ -907,9 +935,12 @@ static void l2cap_conn_ready(struct l2cap_conn *conn) if (!conn->hcon->out && conn->hcon->type == LE_LINK) l2cap_le_conn_ready(conn); - read_lock(&conn->chan_lock); + if (conn->hcon->out && conn->hcon->type == LE_LINK) + smp_conn_security(conn, conn->hcon->pending_sec_level); + + rcu_read_lock(); - list_for_each_entry(chan, &conn->chan_l, list) { + list_for_each_entry_rcu(chan, &conn->chan_l, list) { struct sock *sk = chan->sk; bh_lock_sock(sk); @@ -929,7 +960,7 @@ static void l2cap_conn_ready(struct l2cap_conn *conn) bh_unlock_sock(sk); } - read_unlock(&conn->chan_lock); + rcu_read_unlock(); } /* Notify sockets that we cannot guaranty reliability anymore */ @@ -939,21 +970,22 @@ static void l2cap_conn_unreliable(struct l2cap_conn *conn, int err) BT_DBG("conn %p", conn); - read_lock(&conn->chan_lock); + rcu_read_lock(); - list_for_each_entry(chan, &conn->chan_l, list) { + list_for_each_entry_rcu(chan, &conn->chan_l, list) { struct sock *sk = chan->sk; - if (chan->force_reliable) + if (test_bit(FLAG_FORCE_RELIABLE, &chan->flags)) sk->sk_err = err; } - read_unlock(&conn->chan_lock); + rcu_read_unlock(); } -static void l2cap_info_timeout(unsigned long arg) +static void l2cap_info_timeout(struct work_struct *work) { - struct l2cap_conn *conn = (void *) arg; + struct l2cap_conn *conn = container_of(work, struct l2cap_conn, + info_timer.work); conn->info_state |= L2CAP_INFO_FEAT_MASK_REQ_DONE; conn->info_ident = 0; @@ -977,25 +1009,30 @@ static void l2cap_conn_del(struct hci_conn *hcon, int err) /* Kill channels */ list_for_each_entry_safe(chan, l, &conn->chan_l, list) { sk = chan->sk; - bh_lock_sock(sk); + lock_sock(sk); l2cap_chan_del(chan, err); - bh_unlock_sock(sk); + release_sock(sk); chan->ops->close(chan->data); } + hci_chan_del(conn->hchan); + if (conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_SENT) - del_timer_sync(&conn->info_timer); + __cancel_delayed_work(&conn->info_timer); - if (test_bit(HCI_CONN_ENCRYPT_PEND, &hcon->pend)) - del_timer(&conn->security_timer); + if (test_and_clear_bit(HCI_CONN_LE_SMP_PEND, &hcon->pend)) { + __cancel_delayed_work(&conn->security_timer); + smp_chan_destroy(conn); + } hcon->l2cap_data = NULL; kfree(conn); } -static void security_timeout(unsigned long arg) +static void security_timeout(struct work_struct *work) { - struct l2cap_conn *conn = (void *) arg; + struct l2cap_conn *conn = container_of(work, struct l2cap_conn, + security_timer.work); l2cap_conn_del(conn->hcon, ETIMEDOUT); } @@ -1003,18 +1040,26 @@ static void security_timeout(unsigned long arg) static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon, u8 status) { struct l2cap_conn *conn = hcon->l2cap_data; + struct hci_chan *hchan; if (conn || status) return conn; + hchan = hci_chan_create(hcon); + if (!hchan) + return NULL; + conn = kzalloc(sizeof(struct l2cap_conn), GFP_ATOMIC); - if (!conn) + if (!conn) { + hci_chan_del(hchan); return NULL; + } hcon->l2cap_data = conn; conn->hcon = hcon; + conn->hchan = hchan; - BT_DBG("hcon %p conn %p", hcon, conn); + BT_DBG("hcon %p conn %p hchan %p", hcon, conn, hchan); if (hcon->hdev->le_mtu && hcon->type == LE_LINK) conn->mtu = hcon->hdev->le_mtu; @@ -1027,29 +1072,19 @@ static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon, u8 status) conn->feat_mask = 0; spin_lock_init(&conn->lock); - rwlock_init(&conn->chan_lock); INIT_LIST_HEAD(&conn->chan_l); if (hcon->type == LE_LINK) - setup_timer(&conn->security_timer, security_timeout, - (unsigned long) conn); + INIT_DELAYED_WORK(&conn->security_timer, security_timeout); else - setup_timer(&conn->info_timer, l2cap_info_timeout, - (unsigned long) conn); + INIT_DELAYED_WORK(&conn->info_timer, l2cap_info_timeout); - conn->disc_reason = 0x13; + conn->disc_reason = HCI_ERROR_REMOTE_USER_TERM; return conn; } -static inline void l2cap_chan_add(struct l2cap_conn *conn, struct l2cap_chan *chan) -{ - write_lock_bh(&conn->chan_lock); - __l2cap_chan_add(conn, chan); - write_unlock_bh(&conn->chan_lock); -} - /* ---- Socket interface ---- */ /* Find socket with psm and source bdaddr. @@ -1085,11 +1120,10 @@ static struct l2cap_chan *l2cap_global_chan_by_psm(int state, __le16 psm, bdaddr return c1; } -int l2cap_chan_connect(struct l2cap_chan *chan) +inline int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid, bdaddr_t *dst) { struct sock *sk = chan->sk; bdaddr_t *src = &bt_sk(sk)->src; - bdaddr_t *dst = &bt_sk(sk)->dst; struct l2cap_conn *conn; struct hci_conn *hcon; struct hci_dev *hdev; @@ -1103,7 +1137,62 @@ int l2cap_chan_connect(struct l2cap_chan *chan) if (!hdev) return -EHOSTUNREACH; - hci_dev_lock_bh(hdev); + hci_dev_lock(hdev); + + lock_sock(sk); + + /* PSM must be odd and lsb of upper byte must be 0 */ + if ((__le16_to_cpu(psm) & 0x0101) != 0x0001 && !cid && + chan->chan_type != L2CAP_CHAN_RAW) { + err = -EINVAL; + goto done; + } + + if (chan->chan_type == L2CAP_CHAN_CONN_ORIENTED && !(psm || cid)) { + err = -EINVAL; + goto done; + } + + switch (chan->mode) { + case L2CAP_MODE_BASIC: + break; + case L2CAP_MODE_ERTM: + case L2CAP_MODE_STREAMING: + if (!disable_ertm) + break; + /* fall through */ + default: + err = -ENOTSUPP; + goto done; + } + + switch (sk->sk_state) { + case BT_CONNECT: + case BT_CONNECT2: + case BT_CONFIG: + /* Already connecting */ + err = 0; + goto done; + + case BT_CONNECTED: + /* Already connected */ + err = -EISCONN; + goto done; + + case BT_OPEN: + case BT_BOUND: + /* Can connect */ + break; + + default: + err = -EBADFD; + goto done; + } + + /* Set destination address and psm */ + bacpy(&bt_sk(sk)->dst, dst); + chan->psm = psm; + chan->dcid = cid; auth_type = l2cap_get_auth_type(chan); @@ -1137,7 +1226,7 @@ int l2cap_chan_connect(struct l2cap_chan *chan) if (hcon->state == BT_CONNECTED) { if (chan->chan_type != L2CAP_CHAN_CONN_ORIENTED) { __clear_chan_timer(chan); - if (l2cap_check_security(chan)) + if (l2cap_chan_check_security(chan)) l2cap_state_change(chan, BT_CONNECTED); } else l2cap_do_start(chan); @@ -1146,7 +1235,7 @@ int l2cap_chan_connect(struct l2cap_chan *chan) err = 0; done: - hci_dev_unlock_bh(hdev); + hci_dev_unlock(hdev); hci_dev_put(hdev); return err; } @@ -1183,17 +1272,18 @@ int __l2cap_wait_ack(struct sock *sk) return err; } -static void l2cap_monitor_timeout(unsigned long arg) +static void l2cap_monitor_timeout(struct work_struct *work) { - struct l2cap_chan *chan = (void *) arg; + struct l2cap_chan *chan = container_of(work, struct l2cap_chan, + monitor_timer.work); struct sock *sk = chan->sk; BT_DBG("chan %p", chan); - bh_lock_sock(sk); + lock_sock(sk); if (chan->retry_count >= chan->remote_max_tx) { l2cap_send_disconn_req(chan->conn, chan, ECONNABORTED); - bh_unlock_sock(sk); + release_sock(sk); return; } @@ -1201,24 +1291,25 @@ static void l2cap_monitor_timeout(unsigned long arg) __set_monitor_timer(chan); l2cap_send_rr_or_rnr(chan, L2CAP_CTRL_POLL); - bh_unlock_sock(sk); + release_sock(sk); } -static void l2cap_retrans_timeout(unsigned long arg) +static void l2cap_retrans_timeout(struct work_struct *work) { - struct l2cap_chan *chan = (void *) arg; + struct l2cap_chan *chan = container_of(work, struct l2cap_chan, + retrans_timer.work); struct sock *sk = chan->sk; BT_DBG("chan %p", chan); - bh_lock_sock(sk); + lock_sock(sk); chan->retry_count = 1; __set_monitor_timer(chan); set_bit(CONN_WAIT_F, &chan->conn_state); l2cap_send_rr_or_rnr(chan, L2CAP_CTRL_POLL); - bh_unlock_sock(sk); + release_sock(sk); } static void l2cap_drop_acked_frames(struct l2cap_chan *chan) @@ -1240,60 +1331,46 @@ static void l2cap_drop_acked_frames(struct l2cap_chan *chan) __clear_retrans_timer(chan); } -void l2cap_do_send(struct l2cap_chan *chan, struct sk_buff *skb) -{ - struct hci_conn *hcon = chan->conn->hcon; - u16 flags; - - BT_DBG("chan %p, skb %p len %d", chan, skb, skb->len); - - if (!chan->flushable && lmp_no_flush_capable(hcon->hdev)) - flags = ACL_START_NO_FLUSH; - else - flags = ACL_START; - - bt_cb(skb)->force_active = chan->force_active; - hci_send_acl(hcon, skb, flags); -} - -void l2cap_streaming_send(struct l2cap_chan *chan) +static void l2cap_streaming_send(struct l2cap_chan *chan) { struct sk_buff *skb; - u16 control, fcs; + u32 control; + u16 fcs; while ((skb = skb_dequeue(&chan->tx_q))) { - control = get_unaligned_le16(skb->data + L2CAP_HDR_SIZE); - control |= chan->next_tx_seq << L2CAP_CTRL_TXSEQ_SHIFT; - put_unaligned_le16(control, skb->data + L2CAP_HDR_SIZE); + control = __get_control(chan, skb->data + L2CAP_HDR_SIZE); + control |= __set_txseq(chan, chan->next_tx_seq); + __put_control(chan, control, skb->data + L2CAP_HDR_SIZE); if (chan->fcs == L2CAP_FCS_CRC16) { - fcs = crc16(0, (u8 *)skb->data, skb->len - 2); - put_unaligned_le16(fcs, skb->data + skb->len - 2); + fcs = crc16(0, (u8 *)skb->data, + skb->len - L2CAP_FCS_SIZE); + put_unaligned_le16(fcs, + skb->data + skb->len - L2CAP_FCS_SIZE); } l2cap_do_send(chan, skb); - chan->next_tx_seq = (chan->next_tx_seq + 1) % 64; + chan->next_tx_seq = __next_seq(chan, chan->next_tx_seq); } } -static void l2cap_retransmit_one_frame(struct l2cap_chan *chan, u8 tx_seq) +static void l2cap_retransmit_one_frame(struct l2cap_chan *chan, u16 tx_seq) { struct sk_buff *skb, *tx_skb; - u16 control, fcs; + u16 fcs; + u32 control; skb = skb_peek(&chan->tx_q); if (!skb) return; - do { - if (bt_cb(skb)->tx_seq == tx_seq) - break; - + while (bt_cb(skb)->tx_seq != tx_seq) { if (skb_queue_is_last(&chan->tx_q, skb)) return; - } while ((skb = skb_queue_next(&chan->tx_q, skb))); + skb = skb_queue_next(&chan->tx_q, skb); + } if (chan->remote_max_tx && bt_cb(skb)->retries == chan->remote_max_tx) { @@ -1303,29 +1380,33 @@ static void l2cap_retransmit_one_frame(struct l2cap_chan *chan, u8 tx_seq) tx_skb = skb_clone(skb, GFP_ATOMIC); bt_cb(skb)->retries++; - control = get_unaligned_le16(tx_skb->data + L2CAP_HDR_SIZE); - control &= L2CAP_CTRL_SAR; + + control = __get_control(chan, tx_skb->data + L2CAP_HDR_SIZE); + control &= __get_sar_mask(chan); if (test_and_clear_bit(CONN_SEND_FBIT, &chan->conn_state)) - control |= L2CAP_CTRL_FINAL; + control |= __set_ctrl_final(chan); - control |= (chan->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT) - | (tx_seq << L2CAP_CTRL_TXSEQ_SHIFT); + control |= __set_reqseq(chan, chan->buffer_seq); + control |= __set_txseq(chan, tx_seq); - put_unaligned_le16(control, tx_skb->data + L2CAP_HDR_SIZE); + __put_control(chan, control, tx_skb->data + L2CAP_HDR_SIZE); if (chan->fcs == L2CAP_FCS_CRC16) { - fcs = crc16(0, (u8 *)tx_skb->data, tx_skb->len - 2); - put_unaligned_le16(fcs, tx_skb->data + tx_skb->len - 2); + fcs = crc16(0, (u8 *)tx_skb->data, + tx_skb->len - L2CAP_FCS_SIZE); + put_unaligned_le16(fcs, + tx_skb->data + tx_skb->len - L2CAP_FCS_SIZE); } l2cap_do_send(chan, tx_skb); } -int l2cap_ertm_send(struct l2cap_chan *chan) +static int l2cap_ertm_send(struct l2cap_chan *chan) { struct sk_buff *skb, *tx_skb; - u16 control, fcs; + u16 fcs; + u32 control; int nsent = 0; if (chan->state != BT_CONNECTED) @@ -1343,20 +1424,22 @@ int l2cap_ertm_send(struct l2cap_chan *chan) bt_cb(skb)->retries++; - control = get_unaligned_le16(tx_skb->data + L2CAP_HDR_SIZE); - control &= L2CAP_CTRL_SAR; + control = __get_control(chan, tx_skb->data + L2CAP_HDR_SIZE); + control &= __get_sar_mask(chan); if (test_and_clear_bit(CONN_SEND_FBIT, &chan->conn_state)) - control |= L2CAP_CTRL_FINAL; + control |= __set_ctrl_final(chan); - control |= (chan->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT) - | (chan->next_tx_seq << L2CAP_CTRL_TXSEQ_SHIFT); - put_unaligned_le16(control, tx_skb->data + L2CAP_HDR_SIZE); + control |= __set_reqseq(chan, chan->buffer_seq); + control |= __set_txseq(chan, chan->next_tx_seq); + __put_control(chan, control, tx_skb->data + L2CAP_HDR_SIZE); if (chan->fcs == L2CAP_FCS_CRC16) { - fcs = crc16(0, (u8 *)skb->data, tx_skb->len - 2); - put_unaligned_le16(fcs, skb->data + tx_skb->len - 2); + fcs = crc16(0, (u8 *)skb->data, + tx_skb->len - L2CAP_FCS_SIZE); + put_unaligned_le16(fcs, skb->data + + tx_skb->len - L2CAP_FCS_SIZE); } l2cap_do_send(chan, tx_skb); @@ -1364,7 +1447,8 @@ int l2cap_ertm_send(struct l2cap_chan *chan) __set_retrans_timer(chan); bt_cb(skb)->tx_seq = chan->next_tx_seq; - chan->next_tx_seq = (chan->next_tx_seq + 1) % 64; + + chan->next_tx_seq = __next_seq(chan, chan->next_tx_seq); if (bt_cb(skb)->retries == 1) chan->unacked_frames++; @@ -1396,12 +1480,12 @@ static int l2cap_retransmit_frames(struct l2cap_chan *chan) static void l2cap_send_ack(struct l2cap_chan *chan) { - u16 control = 0; + u32 control = 0; - control |= chan->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT; + control |= __set_reqseq(chan, chan->buffer_seq); if (test_bit(CONN_LOCAL_BUSY, &chan->conn_state)) { - control |= L2CAP_SUPER_RCV_NOT_READY; + control |= __set_ctrl_super(chan, L2CAP_SUPER_RNR); set_bit(CONN_RNR_SENT, &chan->conn_state); l2cap_send_sframe(chan, control); return; @@ -1410,20 +1494,20 @@ static void l2cap_send_ack(struct l2cap_chan *chan) if (l2cap_ertm_send(chan) > 0) return; - control |= L2CAP_SUPER_RCV_READY; + control |= __set_ctrl_super(chan, L2CAP_SUPER_RR); l2cap_send_sframe(chan, control); } static void l2cap_send_srejtail(struct l2cap_chan *chan) { struct srej_list *tail; - u16 control; + u32 control; - control = L2CAP_SUPER_SELECT_REJECT; - control |= L2CAP_CTRL_FINAL; + control = __set_ctrl_super(chan, L2CAP_SUPER_SREJ); + control |= __set_ctrl_final(chan); tail = list_entry((&chan->srej_l)->prev, struct srej_list, list); - control |= tail->tx_seq << L2CAP_CTRL_REQSEQ_SHIFT; + control |= __set_reqseq(chan, tail->tx_seq); l2cap_send_sframe(chan, control); } @@ -1451,6 +1535,8 @@ static inline int l2cap_skbuff_fromiovec(struct sock *sk, struct msghdr *msg, in if (memcpy_fromiovec(skb_put(*frag, count), msg->msg_iov, count)) return -EFAULT; + (*frag)->priority = skb->priority; + sent += count; len -= count; @@ -1460,15 +1546,17 @@ static inline int l2cap_skbuff_fromiovec(struct sock *sk, struct msghdr *msg, in return sent; } -struct sk_buff *l2cap_create_connless_pdu(struct l2cap_chan *chan, struct msghdr *msg, size_t len) +static struct sk_buff *l2cap_create_connless_pdu(struct l2cap_chan *chan, + struct msghdr *msg, size_t len, + u32 priority) { struct sock *sk = chan->sk; struct l2cap_conn *conn = chan->conn; struct sk_buff *skb; - int err, count, hlen = L2CAP_HDR_SIZE + 2; + int err, count, hlen = L2CAP_HDR_SIZE + L2CAP_PSMLEN_SIZE; struct l2cap_hdr *lh; - BT_DBG("sk %p len %d", sk, (int)len); + BT_DBG("sk %p len %d priority %u", sk, (int)len, priority); count = min_t(unsigned int, (conn->mtu - hlen), len); skb = bt_skb_send_alloc(sk, count + hlen, @@ -1476,6 +1564,8 @@ struct sk_buff *l2cap_create_connless_pdu(struct l2cap_chan *chan, struct msghdr if (!skb) return ERR_PTR(err); + skb->priority = priority; + /* Create L2CAP header */ lh = (struct l2cap_hdr *) skb_put(skb, L2CAP_HDR_SIZE); lh->cid = cpu_to_le16(chan->dcid); @@ -1490,7 +1580,9 @@ struct sk_buff *l2cap_create_connless_pdu(struct l2cap_chan *chan, struct msghdr return skb; } -struct sk_buff *l2cap_create_basic_pdu(struct l2cap_chan *chan, struct msghdr *msg, size_t len) +static struct sk_buff *l2cap_create_basic_pdu(struct l2cap_chan *chan, + struct msghdr *msg, size_t len, + u32 priority) { struct sock *sk = chan->sk; struct l2cap_conn *conn = chan->conn; @@ -1506,6 +1598,8 @@ struct sk_buff *l2cap_create_basic_pdu(struct l2cap_chan *chan, struct msghdr *m if (!skb) return ERR_PTR(err); + skb->priority = priority; + /* Create L2CAP header */ lh = (struct l2cap_hdr *) skb_put(skb, L2CAP_HDR_SIZE); lh->cid = cpu_to_le16(chan->dcid); @@ -1519,12 +1613,14 @@ struct sk_buff *l2cap_create_basic_pdu(struct l2cap_chan *chan, struct msghdr *m return skb; } -struct sk_buff *l2cap_create_iframe_pdu(struct l2cap_chan *chan, struct msghdr *msg, size_t len, u16 control, u16 sdulen) +static struct sk_buff *l2cap_create_iframe_pdu(struct l2cap_chan *chan, + struct msghdr *msg, size_t len, + u32 control, u16 sdulen) { struct sock *sk = chan->sk; struct l2cap_conn *conn = chan->conn; struct sk_buff *skb; - int err, count, hlen = L2CAP_HDR_SIZE + 2; + int err, count, hlen; struct l2cap_hdr *lh; BT_DBG("sk %p len %d", sk, (int)len); @@ -1532,11 +1628,16 @@ struct sk_buff *l2cap_create_iframe_pdu(struct l2cap_chan *chan, struct msghdr * if (!conn) return ERR_PTR(-ENOTCONN); + if (test_bit(FLAG_EXT_CTRL, &chan->flags)) + hlen = L2CAP_EXT_HDR_SIZE; + else + hlen = L2CAP_ENH_HDR_SIZE; + if (sdulen) - hlen += 2; + hlen += L2CAP_SDULEN_SIZE; if (chan->fcs == L2CAP_FCS_CRC16) - hlen += 2; + hlen += L2CAP_FCS_SIZE; count = min_t(unsigned int, (conn->mtu - hlen), len); skb = bt_skb_send_alloc(sk, count + hlen, @@ -1548,9 +1649,11 @@ struct sk_buff *l2cap_create_iframe_pdu(struct l2cap_chan *chan, struct msghdr * lh = (struct l2cap_hdr *) skb_put(skb, L2CAP_HDR_SIZE); lh->cid = cpu_to_le16(chan->dcid); lh->len = cpu_to_le16(len + (hlen - L2CAP_HDR_SIZE)); - put_unaligned_le16(control, skb_put(skb, 2)); + + __put_control(chan, control, skb_put(skb, __ctrl_size(chan))); + if (sdulen) - put_unaligned_le16(sdulen, skb_put(skb, 2)); + put_unaligned_le16(sdulen, skb_put(skb, L2CAP_SDULEN_SIZE)); err = l2cap_skbuff_fromiovec(sk, msg, len, count, skb); if (unlikely(err < 0)) { @@ -1559,21 +1662,21 @@ struct sk_buff *l2cap_create_iframe_pdu(struct l2cap_chan *chan, struct msghdr * } if (chan->fcs == L2CAP_FCS_CRC16) - put_unaligned_le16(0, skb_put(skb, 2)); + put_unaligned_le16(0, skb_put(skb, L2CAP_FCS_SIZE)); bt_cb(skb)->retries = 0; return skb; } -int l2cap_sar_segment_sdu(struct l2cap_chan *chan, struct msghdr *msg, size_t len) +static int l2cap_sar_segment_sdu(struct l2cap_chan *chan, struct msghdr *msg, size_t len) { struct sk_buff *skb; struct sk_buff_head sar_queue; - u16 control; + u32 control; size_t size = 0; skb_queue_head_init(&sar_queue); - control = L2CAP_SDU_START; + control = __set_ctrl_sar(chan, L2CAP_SAR_START); skb = l2cap_create_iframe_pdu(chan, msg, chan->remote_mps, control, len); if (IS_ERR(skb)) return PTR_ERR(skb); @@ -1586,10 +1689,10 @@ int l2cap_sar_segment_sdu(struct l2cap_chan *chan, struct msghdr *msg, size_t le size_t buflen; if (len > chan->remote_mps) { - control = L2CAP_SDU_CONTINUE; + control = __set_ctrl_sar(chan, L2CAP_SAR_CONTINUE); buflen = chan->remote_mps; } else { - control = L2CAP_SDU_END; + control = __set_ctrl_sar(chan, L2CAP_SAR_END); buflen = len; } @@ -1610,15 +1713,16 @@ int l2cap_sar_segment_sdu(struct l2cap_chan *chan, struct msghdr *msg, size_t le return size; } -int l2cap_chan_send(struct l2cap_chan *chan, struct msghdr *msg, size_t len) +int l2cap_chan_send(struct l2cap_chan *chan, struct msghdr *msg, size_t len, + u32 priority) { struct sk_buff *skb; - u16 control; + u32 control; int err; /* Connectionless channel */ if (chan->chan_type == L2CAP_CHAN_CONN_LESS) { - skb = l2cap_create_connless_pdu(chan, msg, len); + skb = l2cap_create_connless_pdu(chan, msg, len, priority); if (IS_ERR(skb)) return PTR_ERR(skb); @@ -1633,7 +1737,7 @@ int l2cap_chan_send(struct l2cap_chan *chan, struct msghdr *msg, size_t len) return -EMSGSIZE; /* Create a basic PDU */ - skb = l2cap_create_basic_pdu(chan, msg, len); + skb = l2cap_create_basic_pdu(chan, msg, len, priority); if (IS_ERR(skb)) return PTR_ERR(skb); @@ -1645,7 +1749,7 @@ int l2cap_chan_send(struct l2cap_chan *chan, struct msghdr *msg, size_t len) case L2CAP_MODE_STREAMING: /* Entire SDU fits into one PDU */ if (len <= chan->remote_mps) { - control = L2CAP_SDU_UNSEGMENTED; + control = __set_ctrl_sar(chan, L2CAP_SAR_UNSEGMENTED); skb = l2cap_create_iframe_pdu(chan, msg, len, control, 0); if (IS_ERR(skb)) @@ -1697,8 +1801,9 @@ static void l2cap_raw_recv(struct l2cap_conn *conn, struct sk_buff *skb) BT_DBG("conn %p", conn); - read_lock(&conn->chan_lock); - list_for_each_entry(chan, &conn->chan_l, list) { + rcu_read_lock(); + + list_for_each_entry_rcu(chan, &conn->chan_l, list) { struct sock *sk = chan->sk; if (chan->chan_type != L2CAP_CHAN_RAW) continue; @@ -1713,7 +1818,8 @@ static void l2cap_raw_recv(struct l2cap_conn *conn, struct sk_buff *skb) if (chan->ops->recv(chan->data, nskb)) kfree_skb(nskb); } - read_unlock(&conn->chan_lock); + + rcu_read_unlock(); } /* ---- L2CAP signalling commands ---- */ @@ -1843,37 +1949,64 @@ static void l2cap_add_conf_opt(void **ptr, u8 type, u8 len, unsigned long val) *ptr += L2CAP_CONF_OPT_SIZE + len; } -static void l2cap_ack_timeout(unsigned long arg) +static void l2cap_add_opt_efs(void **ptr, struct l2cap_chan *chan) +{ + struct l2cap_conf_efs efs; + + switch (chan->mode) { + case L2CAP_MODE_ERTM: + efs.id = chan->local_id; + efs.stype = chan->local_stype; + efs.msdu = cpu_to_le16(chan->local_msdu); + efs.sdu_itime = cpu_to_le32(chan->local_sdu_itime); + efs.acc_lat = cpu_to_le32(L2CAP_DEFAULT_ACC_LAT); + efs.flush_to = cpu_to_le32(L2CAP_DEFAULT_FLUSH_TO); + break; + + case L2CAP_MODE_STREAMING: + efs.id = 1; + efs.stype = L2CAP_SERV_BESTEFFORT; + efs.msdu = cpu_to_le16(chan->local_msdu); + efs.sdu_itime = cpu_to_le32(chan->local_sdu_itime); + efs.acc_lat = 0; + efs.flush_to = 0; + break; + + default: + return; + } + + l2cap_add_conf_opt(ptr, L2CAP_CONF_EFS, sizeof(efs), + (unsigned long) &efs); +} + +static void l2cap_ack_timeout(struct work_struct *work) { - struct l2cap_chan *chan = (void *) arg; + struct l2cap_chan *chan = container_of(work, struct l2cap_chan, + ack_timer.work); - bh_lock_sock(chan->sk); + BT_DBG("chan %p", chan); + + lock_sock(chan->sk); l2cap_send_ack(chan); - bh_unlock_sock(chan->sk); + release_sock(chan->sk); } static inline void l2cap_ertm_init(struct l2cap_chan *chan) { - struct sock *sk = chan->sk; - chan->expected_ack_seq = 0; chan->unacked_frames = 0; chan->buffer_seq = 0; chan->num_acked = 0; chan->frames_sent = 0; - setup_timer(&chan->retrans_timer, l2cap_retrans_timeout, - (unsigned long) chan); - setup_timer(&chan->monitor_timer, l2cap_monitor_timeout, - (unsigned long) chan); - setup_timer(&chan->ack_timer, l2cap_ack_timeout, (unsigned long) chan); + INIT_DELAYED_WORK(&chan->retrans_timer, l2cap_retrans_timeout); + INIT_DELAYED_WORK(&chan->monitor_timer, l2cap_monitor_timeout); + INIT_DELAYED_WORK(&chan->ack_timer, l2cap_ack_timeout); skb_queue_head_init(&chan->srej_q); INIT_LIST_HEAD(&chan->srej_l); - - - sk->sk_backlog_rcv = l2cap_ertm_data_rcv; } static inline __u8 l2cap_select_mode(__u8 mode, __u16 remote_feat_mask) @@ -1889,11 +2022,36 @@ static inline __u8 l2cap_select_mode(__u8 mode, __u16 remote_feat_mask) } } +static inline bool __l2cap_ews_supported(struct l2cap_chan *chan) +{ + return enable_hs && chan->conn->feat_mask & L2CAP_FEAT_EXT_WINDOW; +} + +static inline bool __l2cap_efs_supported(struct l2cap_chan *chan) +{ + return enable_hs && chan->conn->feat_mask & L2CAP_FEAT_EXT_FLOW; +} + +static inline void l2cap_txwin_setup(struct l2cap_chan *chan) +{ + if (chan->tx_win > L2CAP_DEFAULT_TX_WINDOW && + __l2cap_ews_supported(chan)) { + /* use extended control field */ + set_bit(FLAG_EXT_CTRL, &chan->flags); + chan->tx_win_max = L2CAP_DEFAULT_EXT_WINDOW; + } else { + chan->tx_win = min_t(u16, chan->tx_win, + L2CAP_DEFAULT_TX_WINDOW); + chan->tx_win_max = L2CAP_DEFAULT_TX_WINDOW; + } +} + static int l2cap_build_conf_req(struct l2cap_chan *chan, void *data) { struct l2cap_conf_req *req = data; struct l2cap_conf_rfc rfc = { .mode = chan->mode }; void *ptr = req->data; + u16 size; BT_DBG("chan %p", chan); @@ -1906,6 +2064,9 @@ static int l2cap_build_conf_req(struct l2cap_chan *chan, void *data) if (test_bit(CONF_STATE2_DEVICE, &chan->conf_state)) break; + if (__l2cap_efs_supported(chan)) + set_bit(FLAG_EFS_ENABLE, &chan->flags); + /* fall through */ default: chan->mode = l2cap_select_mode(rfc.mode, chan->conn->feat_mask); @@ -1935,17 +2096,27 @@ done: case L2CAP_MODE_ERTM: rfc.mode = L2CAP_MODE_ERTM; - rfc.txwin_size = chan->tx_win; rfc.max_transmit = chan->max_tx; rfc.retrans_timeout = 0; rfc.monitor_timeout = 0; - rfc.max_pdu_size = cpu_to_le16(L2CAP_DEFAULT_MAX_PDU_SIZE); - if (L2CAP_DEFAULT_MAX_PDU_SIZE > chan->conn->mtu - 10) - rfc.max_pdu_size = cpu_to_le16(chan->conn->mtu - 10); + + size = min_t(u16, L2CAP_DEFAULT_MAX_PDU_SIZE, chan->conn->mtu - + L2CAP_EXT_HDR_SIZE - + L2CAP_SDULEN_SIZE - + L2CAP_FCS_SIZE); + rfc.max_pdu_size = cpu_to_le16(size); + + l2cap_txwin_setup(chan); + + rfc.txwin_size = min_t(u16, chan->tx_win, + L2CAP_DEFAULT_TX_WINDOW); l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, sizeof(rfc), (unsigned long) &rfc); + if (test_bit(FLAG_EFS_ENABLE, &chan->flags)) + l2cap_add_opt_efs(&ptr, chan); + if (!(chan->conn->feat_mask & L2CAP_FEAT_FCS)) break; @@ -1954,6 +2125,10 @@ done: chan->fcs = L2CAP_FCS_NONE; l2cap_add_conf_opt(&ptr, L2CAP_CONF_FCS, 1, chan->fcs); } + + if (test_bit(FLAG_EXT_CTRL, &chan->flags)) + l2cap_add_conf_opt(&ptr, L2CAP_CONF_EWS, 2, + chan->tx_win); break; case L2CAP_MODE_STREAMING: @@ -1962,13 +2137,19 @@ done: rfc.max_transmit = 0; rfc.retrans_timeout = 0; rfc.monitor_timeout = 0; - rfc.max_pdu_size = cpu_to_le16(L2CAP_DEFAULT_MAX_PDU_SIZE); - if (L2CAP_DEFAULT_MAX_PDU_SIZE > chan->conn->mtu - 10) - rfc.max_pdu_size = cpu_to_le16(chan->conn->mtu - 10); + + size = min_t(u16, L2CAP_DEFAULT_MAX_PDU_SIZE, chan->conn->mtu - + L2CAP_EXT_HDR_SIZE - + L2CAP_SDULEN_SIZE - + L2CAP_FCS_SIZE); + rfc.max_pdu_size = cpu_to_le16(size); l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, sizeof(rfc), (unsigned long) &rfc); + if (test_bit(FLAG_EFS_ENABLE, &chan->flags)) + l2cap_add_opt_efs(&ptr, chan); + if (!(chan->conn->feat_mask & L2CAP_FEAT_FCS)) break; @@ -1995,8 +2176,11 @@ static int l2cap_parse_conf_req(struct l2cap_chan *chan, void *data) int type, hint, olen; unsigned long val; struct l2cap_conf_rfc rfc = { .mode = L2CAP_MODE_BASIC }; + struct l2cap_conf_efs efs; + u8 remote_efs = 0; u16 mtu = L2CAP_DEFAULT_MTU; u16 result = L2CAP_CONF_SUCCESS; + u16 size; BT_DBG("chan %p", chan); @@ -2026,7 +2210,22 @@ static int l2cap_parse_conf_req(struct l2cap_chan *chan, void *data) case L2CAP_CONF_FCS: if (val == L2CAP_FCS_NONE) set_bit(CONF_NO_FCS_RECV, &chan->conf_state); + break; + case L2CAP_CONF_EFS: + remote_efs = 1; + if (olen == sizeof(efs)) + memcpy(&efs, (void *) val, olen); + break; + + case L2CAP_CONF_EWS: + if (!enable_hs) + return -ECONNREFUSED; + + set_bit(FLAG_EXT_CTRL, &chan->flags); + set_bit(CONF_EWS_RECV, &chan->conf_state); + chan->tx_win_max = L2CAP_DEFAULT_EXT_WINDOW; + chan->remote_tx_win = val; break; default: @@ -2051,6 +2250,13 @@ static int l2cap_parse_conf_req(struct l2cap_chan *chan, void *data) break; } + if (remote_efs) { + if (__l2cap_efs_supported(chan)) + set_bit(FLAG_EFS_ENABLE, &chan->flags); + else + return -ECONNREFUSED; + } + if (chan->mode != rfc.mode) return -ECONNREFUSED; @@ -2069,7 +2275,6 @@ done: sizeof(rfc), (unsigned long) &rfc); } - if (result == L2CAP_CONF_SUCCESS) { /* Configure output options and let the other side know * which ones we don't like. */ @@ -2082,6 +2287,26 @@ done: } l2cap_add_conf_opt(&ptr, L2CAP_CONF_MTU, 2, chan->omtu); + if (remote_efs) { + if (chan->local_stype != L2CAP_SERV_NOTRAFIC && + efs.stype != L2CAP_SERV_NOTRAFIC && + efs.stype != chan->local_stype) { + + result = L2CAP_CONF_UNACCEPT; + + if (chan->num_conf_req >= 1) + return -ECONNREFUSED; + + l2cap_add_conf_opt(&ptr, L2CAP_CONF_EFS, + sizeof(efs), + (unsigned long) &efs); + } else { + /* Send PENDING Conf Rsp */ + result = L2CAP_CONF_PENDING; + set_bit(CONF_LOC_CONF_PEND, &chan->conf_state); + } + } + switch (rfc.mode) { case L2CAP_MODE_BASIC: chan->fcs = L2CAP_FCS_NONE; @@ -2089,13 +2314,20 @@ done: break; case L2CAP_MODE_ERTM: - chan->remote_tx_win = rfc.txwin_size; - chan->remote_max_tx = rfc.max_transmit; + if (!test_bit(CONF_EWS_RECV, &chan->conf_state)) + chan->remote_tx_win = rfc.txwin_size; + else + rfc.txwin_size = L2CAP_DEFAULT_TX_WINDOW; - if (le16_to_cpu(rfc.max_pdu_size) > chan->conn->mtu - 10) - rfc.max_pdu_size = cpu_to_le16(chan->conn->mtu - 10); + chan->remote_max_tx = rfc.max_transmit; - chan->remote_mps = le16_to_cpu(rfc.max_pdu_size); + size = min_t(u16, le16_to_cpu(rfc.max_pdu_size), + chan->conn->mtu - + L2CAP_EXT_HDR_SIZE - + L2CAP_SDULEN_SIZE - + L2CAP_FCS_SIZE); + rfc.max_pdu_size = cpu_to_le16(size); + chan->remote_mps = size; rfc.retrans_timeout = le16_to_cpu(L2CAP_DEFAULT_RETRANS_TO); @@ -2107,13 +2339,29 @@ done: l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, sizeof(rfc), (unsigned long) &rfc); + if (test_bit(FLAG_EFS_ENABLE, &chan->flags)) { + chan->remote_id = efs.id; + chan->remote_stype = efs.stype; + chan->remote_msdu = le16_to_cpu(efs.msdu); + chan->remote_flush_to = + le32_to_cpu(efs.flush_to); + chan->remote_acc_lat = + le32_to_cpu(efs.acc_lat); + chan->remote_sdu_itime = + le32_to_cpu(efs.sdu_itime); + l2cap_add_conf_opt(&ptr, L2CAP_CONF_EFS, + sizeof(efs), (unsigned long) &efs); + } break; case L2CAP_MODE_STREAMING: - if (le16_to_cpu(rfc.max_pdu_size) > chan->conn->mtu - 10) - rfc.max_pdu_size = cpu_to_le16(chan->conn->mtu - 10); - - chan->remote_mps = le16_to_cpu(rfc.max_pdu_size); + size = min_t(u16, le16_to_cpu(rfc.max_pdu_size), + chan->conn->mtu - + L2CAP_EXT_HDR_SIZE - + L2CAP_SDULEN_SIZE - + L2CAP_FCS_SIZE); + rfc.max_pdu_size = cpu_to_le16(size); + chan->remote_mps = size; set_bit(CONF_MODE_DONE, &chan->conf_state); @@ -2145,7 +2393,8 @@ static int l2cap_parse_conf_rsp(struct l2cap_chan *chan, void *rsp, int len, voi void *ptr = req->data; int type, olen; unsigned long val; - struct l2cap_conf_rfc rfc; + struct l2cap_conf_rfc rfc = { .mode = L2CAP_MODE_BASIC }; + struct l2cap_conf_efs efs; BT_DBG("chan %p, rsp %p, len %d, req %p", chan, rsp, len, data); @@ -2181,6 +2430,26 @@ static int l2cap_parse_conf_rsp(struct l2cap_chan *chan, void *rsp, int len, voi l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, sizeof(rfc), (unsigned long) &rfc); break; + + case L2CAP_CONF_EWS: + chan->tx_win = min_t(u16, val, + L2CAP_DEFAULT_EXT_WINDOW); + l2cap_add_conf_opt(&ptr, L2CAP_CONF_EWS, 2, + chan->tx_win); + break; + + case L2CAP_CONF_EFS: + if (olen == sizeof(efs)) + memcpy(&efs, (void *)val, olen); + + if (chan->local_stype != L2CAP_SERV_NOTRAFIC && + efs.stype != L2CAP_SERV_NOTRAFIC && + efs.stype != chan->local_stype) + return -ECONNREFUSED; + + l2cap_add_conf_opt(&ptr, L2CAP_CONF_EFS, + sizeof(efs), (unsigned long) &efs); + break; } } @@ -2189,13 +2458,23 @@ static int l2cap_parse_conf_rsp(struct l2cap_chan *chan, void *rsp, int len, voi chan->mode = rfc.mode; - if (*result == L2CAP_CONF_SUCCESS) { + if (*result == L2CAP_CONF_SUCCESS || *result == L2CAP_CONF_PENDING) { switch (rfc.mode) { case L2CAP_MODE_ERTM: chan->retrans_timeout = le16_to_cpu(rfc.retrans_timeout); chan->monitor_timeout = le16_to_cpu(rfc.monitor_timeout); chan->mps = le16_to_cpu(rfc.max_pdu_size); + + if (test_bit(FLAG_EFS_ENABLE, &chan->flags)) { + chan->local_msdu = le16_to_cpu(efs.msdu); + chan->local_sdu_itime = + le32_to_cpu(efs.sdu_itime); + chan->local_acc_lat = le32_to_cpu(efs.acc_lat); + chan->local_flush_to = + le32_to_cpu(efs.flush_to); + } break; + case L2CAP_MODE_STREAMING: chan->mps = le16_to_cpu(rfc.max_pdu_size); } @@ -2264,6 +2543,16 @@ static void l2cap_conf_rfc_get(struct l2cap_chan *chan, void *rsp, int len) } } + /* Use sane default values in case a misbehaving remote device + * did not send an RFC option. + */ + rfc.mode = chan->mode; + rfc.retrans_timeout = cpu_to_le16(L2CAP_DEFAULT_RETRANS_TO); + rfc.monitor_timeout = cpu_to_le16(L2CAP_DEFAULT_MONITOR_TO); + rfc.max_pdu_size = cpu_to_le16(chan->imtu); + + BT_ERR("Expected RFC option was not found, using defaults"); + done: switch (rfc.mode) { case L2CAP_MODE_ERTM: @@ -2285,7 +2574,7 @@ static inline int l2cap_command_rej(struct l2cap_conn *conn, struct l2cap_cmd_hd if ((conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_SENT) && cmd->ident == conn->info_ident) { - del_timer(&conn->info_timer); + __cancel_delayed_work(&conn->info_timer); conn->info_state |= L2CAP_INFO_FEAT_MASK_REQ_DONE; conn->info_ident = 0; @@ -2318,12 +2607,12 @@ static inline int l2cap_connect_req(struct l2cap_conn *conn, struct l2cap_cmd_hd parent = pchan->sk; - bh_lock_sock(parent); + lock_sock(parent); /* Check if the ACL is secure enough (if not SDP) */ if (psm != cpu_to_le16(0x0001) && !hci_conn_check_link_mode(conn->hcon)) { - conn->disc_reason = 0x05; + conn->disc_reason = HCI_ERROR_AUTH_FAILURE; result = L2CAP_CR_SEC_BLOCK; goto response; } @@ -2342,11 +2631,8 @@ static inline int l2cap_connect_req(struct l2cap_conn *conn, struct l2cap_cmd_hd sk = chan->sk; - write_lock_bh(&conn->chan_lock); - /* Check if we already have channel with that dcid */ if (__l2cap_get_chan_by_dcid(conn, scid)) { - write_unlock_bh(&conn->chan_lock); sock_set_flag(sk, SOCK_ZAPPED); chan->ops->close(chan->data); goto response; @@ -2361,7 +2647,7 @@ static inline int l2cap_connect_req(struct l2cap_conn *conn, struct l2cap_cmd_hd bt_accept_enqueue(parent, sk); - __l2cap_chan_add(conn, chan); + l2cap_chan_add(conn, chan); dcid = chan->scid; @@ -2370,7 +2656,7 @@ static inline int l2cap_connect_req(struct l2cap_conn *conn, struct l2cap_cmd_hd chan->ident = cmd->ident; if (conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_DONE) { - if (l2cap_check_security(chan)) { + if (l2cap_chan_check_security(chan)) { if (bt_sk(sk)->defer_setup) { l2cap_state_change(chan, BT_CONNECT2); result = L2CAP_CR_PEND; @@ -2392,10 +2678,8 @@ static inline int l2cap_connect_req(struct l2cap_conn *conn, struct l2cap_cmd_hd status = L2CAP_CS_NO_INFO; } - write_unlock_bh(&conn->chan_lock); - response: - bh_unlock_sock(parent); + release_sock(parent); sendresp: rsp.scid = cpu_to_le16(scid); @@ -2411,7 +2695,7 @@ sendresp: conn->info_state |= L2CAP_INFO_FEAT_MASK_REQ_SENT; conn->info_ident = l2cap_get_ident(conn); - mod_timer(&conn->info_timer, jiffies + + schedule_delayed_work(&conn->info_timer, msecs_to_jiffies(L2CAP_INFO_TIMEOUT)); l2cap_send_cmd(conn, conn->info_ident, @@ -2477,19 +2761,11 @@ static inline int l2cap_connect_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hd break; default: - /* don't delete l2cap channel if sk is owned by user */ - if (sock_owned_by_user(sk)) { - l2cap_state_change(chan, BT_DISCONN); - __clear_chan_timer(chan); - __set_chan_timer(chan, HZ / 5); - break; - } - l2cap_chan_del(chan, ECONNREFUSED); break; } - bh_unlock_sock(sk); + release_sock(sk); return 0; } @@ -2595,8 +2871,23 @@ static inline int l2cap_config_req(struct l2cap_conn *conn, struct l2cap_cmd_hdr chan->num_conf_req++; } + /* Got Conf Rsp PENDING from remote side and asume we sent + Conf Rsp PENDING in the code above */ + if (test_bit(CONF_REM_CONF_PEND, &chan->conf_state) && + test_bit(CONF_LOC_CONF_PEND, &chan->conf_state)) { + + /* check compatibility */ + + clear_bit(CONF_LOC_CONF_PEND, &chan->conf_state); + set_bit(CONF_OUTPUT_DONE, &chan->conf_state); + + l2cap_send_cmd(conn, cmd->ident, L2CAP_CONF_RSP, + l2cap_build_conf_rsp(chan, rsp, + L2CAP_CONF_SUCCESS, 0x0000), rsp); + } + unlock: - bh_unlock_sock(sk); + release_sock(sk); return 0; } @@ -2624,8 +2915,33 @@ static inline int l2cap_config_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hdr switch (result) { case L2CAP_CONF_SUCCESS: l2cap_conf_rfc_get(chan, rsp->data, len); + clear_bit(CONF_REM_CONF_PEND, &chan->conf_state); break; + case L2CAP_CONF_PENDING: + set_bit(CONF_REM_CONF_PEND, &chan->conf_state); + + if (test_bit(CONF_LOC_CONF_PEND, &chan->conf_state)) { + char buf[64]; + + len = l2cap_parse_conf_rsp(chan, rsp->data, len, + buf, &result); + if (len < 0) { + l2cap_send_disconn_req(conn, chan, ECONNRESET); + goto done; + } + + /* check compatibility */ + + clear_bit(CONF_LOC_CONF_PEND, &chan->conf_state); + set_bit(CONF_OUTPUT_DONE, &chan->conf_state); + + l2cap_send_cmd(conn, cmd->ident, L2CAP_CONF_RSP, + l2cap_build_conf_rsp(chan, buf, + L2CAP_CONF_SUCCESS, 0x0000), buf); + } + goto done; + case L2CAP_CONF_UNACCEPT: if (chan->num_conf_rsp <= L2CAP_CONF_MAX_CONF_RSP) { char req[64]; @@ -2654,7 +2970,7 @@ static inline int l2cap_config_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hdr default: sk->sk_err = ECONNRESET; - __set_chan_timer(chan, HZ * 5); + __set_chan_timer(chan, L2CAP_DISC_REJ_TIMEOUT); l2cap_send_disconn_req(conn, chan, ECONNRESET); goto done; } @@ -2678,7 +2994,7 @@ static inline int l2cap_config_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hdr } done: - bh_unlock_sock(sk); + release_sock(sk); return 0; } @@ -2707,17 +3023,8 @@ static inline int l2cap_disconnect_req(struct l2cap_conn *conn, struct l2cap_cmd sk->sk_shutdown = SHUTDOWN_MASK; - /* don't delete l2cap channel if sk is owned by user */ - if (sock_owned_by_user(sk)) { - l2cap_state_change(chan, BT_DISCONN); - __clear_chan_timer(chan); - __set_chan_timer(chan, HZ / 5); - bh_unlock_sock(sk); - return 0; - } - l2cap_chan_del(chan, ECONNRESET); - bh_unlock_sock(sk); + release_sock(sk); chan->ops->close(chan->data); return 0; @@ -2741,17 +3048,8 @@ static inline int l2cap_disconnect_rsp(struct l2cap_conn *conn, struct l2cap_cmd sk = chan->sk; - /* don't delete l2cap channel if sk is owned by user */ - if (sock_owned_by_user(sk)) { - l2cap_state_change(chan,BT_DISCONN); - __clear_chan_timer(chan); - __set_chan_timer(chan, HZ / 5); - bh_unlock_sock(sk); - return 0; - } - l2cap_chan_del(chan, 0); - bh_unlock_sock(sk); + release_sock(sk); chan->ops->close(chan->data); return 0; @@ -2775,15 +3073,25 @@ static inline int l2cap_information_req(struct l2cap_conn *conn, struct l2cap_cm if (!disable_ertm) feat_mask |= L2CAP_FEAT_ERTM | L2CAP_FEAT_STREAMING | L2CAP_FEAT_FCS; + if (enable_hs) + feat_mask |= L2CAP_FEAT_EXT_FLOW + | L2CAP_FEAT_EXT_WINDOW; + put_unaligned_le32(feat_mask, rsp->data); l2cap_send_cmd(conn, cmd->ident, L2CAP_INFO_RSP, sizeof(buf), buf); } else if (type == L2CAP_IT_FIXED_CHAN) { u8 buf[12]; struct l2cap_info_rsp *rsp = (struct l2cap_info_rsp *) buf; + + if (enable_hs) + l2cap_fixed_chan[0] |= L2CAP_FC_A2MP; + else + l2cap_fixed_chan[0] &= ~L2CAP_FC_A2MP; + rsp->type = cpu_to_le16(L2CAP_IT_FIXED_CHAN); rsp->result = cpu_to_le16(L2CAP_IR_SUCCESS); - memcpy(buf + 4, l2cap_fixed_chan, 8); + memcpy(rsp->data, l2cap_fixed_chan, sizeof(l2cap_fixed_chan)); l2cap_send_cmd(conn, cmd->ident, L2CAP_INFO_RSP, sizeof(buf), buf); } else { @@ -2812,7 +3120,7 @@ static inline int l2cap_information_rsp(struct l2cap_conn *conn, struct l2cap_cm conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_DONE) return 0; - del_timer(&conn->info_timer); + __cancel_delayed_work(&conn->info_timer); if (result != L2CAP_IR_SUCCESS) { conn->info_state |= L2CAP_INFO_FEAT_MASK_REQ_DONE; @@ -2850,6 +3158,165 @@ static inline int l2cap_information_rsp(struct l2cap_conn *conn, struct l2cap_cm return 0; } +static inline int l2cap_create_channel_req(struct l2cap_conn *conn, + struct l2cap_cmd_hdr *cmd, u16 cmd_len, + void *data) +{ + struct l2cap_create_chan_req *req = data; + struct l2cap_create_chan_rsp rsp; + u16 psm, scid; + + if (cmd_len != sizeof(*req)) + return -EPROTO; + + if (!enable_hs) + return -EINVAL; + + psm = le16_to_cpu(req->psm); + scid = le16_to_cpu(req->scid); + + BT_DBG("psm %d, scid %d, amp_id %d", psm, scid, req->amp_id); + + /* Placeholder: Always reject */ + rsp.dcid = 0; + rsp.scid = cpu_to_le16(scid); + rsp.result = L2CAP_CR_NO_MEM; + rsp.status = L2CAP_CS_NO_INFO; + + l2cap_send_cmd(conn, cmd->ident, L2CAP_CREATE_CHAN_RSP, + sizeof(rsp), &rsp); + + return 0; +} + +static inline int l2cap_create_channel_rsp(struct l2cap_conn *conn, + struct l2cap_cmd_hdr *cmd, void *data) +{ + BT_DBG("conn %p", conn); + + return l2cap_connect_rsp(conn, cmd, data); +} + +static void l2cap_send_move_chan_rsp(struct l2cap_conn *conn, u8 ident, + u16 icid, u16 result) +{ + struct l2cap_move_chan_rsp rsp; + + BT_DBG("icid %d, result %d", icid, result); + + rsp.icid = cpu_to_le16(icid); + rsp.result = cpu_to_le16(result); + + l2cap_send_cmd(conn, ident, L2CAP_MOVE_CHAN_RSP, sizeof(rsp), &rsp); +} + +static void l2cap_send_move_chan_cfm(struct l2cap_conn *conn, + struct l2cap_chan *chan, u16 icid, u16 result) +{ + struct l2cap_move_chan_cfm cfm; + u8 ident; + + BT_DBG("icid %d, result %d", icid, result); + + ident = l2cap_get_ident(conn); + if (chan) + chan->ident = ident; + + cfm.icid = cpu_to_le16(icid); + cfm.result = cpu_to_le16(result); + + l2cap_send_cmd(conn, ident, L2CAP_MOVE_CHAN_CFM, sizeof(cfm), &cfm); +} + +static void l2cap_send_move_chan_cfm_rsp(struct l2cap_conn *conn, u8 ident, + u16 icid) +{ + struct l2cap_move_chan_cfm_rsp rsp; + + BT_DBG("icid %d", icid); + + rsp.icid = cpu_to_le16(icid); + l2cap_send_cmd(conn, ident, L2CAP_MOVE_CHAN_CFM_RSP, sizeof(rsp), &rsp); +} + +static inline int l2cap_move_channel_req(struct l2cap_conn *conn, + struct l2cap_cmd_hdr *cmd, u16 cmd_len, void *data) +{ + struct l2cap_move_chan_req *req = data; + u16 icid = 0; + u16 result = L2CAP_MR_NOT_ALLOWED; + + if (cmd_len != sizeof(*req)) + return -EPROTO; + + icid = le16_to_cpu(req->icid); + + BT_DBG("icid %d, dest_amp_id %d", icid, req->dest_amp_id); + + if (!enable_hs) + return -EINVAL; + + /* Placeholder: Always refuse */ + l2cap_send_move_chan_rsp(conn, cmd->ident, icid, result); + + return 0; +} + +static inline int l2cap_move_channel_rsp(struct l2cap_conn *conn, + struct l2cap_cmd_hdr *cmd, u16 cmd_len, void *data) +{ + struct l2cap_move_chan_rsp *rsp = data; + u16 icid, result; + + if (cmd_len != sizeof(*rsp)) + return -EPROTO; + + icid = le16_to_cpu(rsp->icid); + result = le16_to_cpu(rsp->result); + + BT_DBG("icid %d, result %d", icid, result); + + /* Placeholder: Always unconfirmed */ + l2cap_send_move_chan_cfm(conn, NULL, icid, L2CAP_MC_UNCONFIRMED); + + return 0; +} + +static inline int l2cap_move_channel_confirm(struct l2cap_conn *conn, + struct l2cap_cmd_hdr *cmd, u16 cmd_len, void *data) +{ + struct l2cap_move_chan_cfm *cfm = data; + u16 icid, result; + + if (cmd_len != sizeof(*cfm)) + return -EPROTO; + + icid = le16_to_cpu(cfm->icid); + result = le16_to_cpu(cfm->result); + + BT_DBG("icid %d, result %d", icid, result); + + l2cap_send_move_chan_cfm_rsp(conn, cmd->ident, icid); + + return 0; +} + +static inline int l2cap_move_channel_confirm_rsp(struct l2cap_conn *conn, + struct l2cap_cmd_hdr *cmd, u16 cmd_len, void *data) +{ + struct l2cap_move_chan_cfm_rsp *rsp = data; + u16 icid; + + if (cmd_len != sizeof(*rsp)) + return -EPROTO; + + icid = le16_to_cpu(rsp->icid); + + BT_DBG("icid %d", icid); + + return 0; +} + static inline int l2cap_check_conn_param(u16 min, u16 max, u16 latency, u16 to_multiplier) { @@ -2962,6 +3429,30 @@ static inline int l2cap_bredr_sig_cmd(struct l2cap_conn *conn, err = l2cap_information_rsp(conn, cmd, data); break; + case L2CAP_CREATE_CHAN_REQ: + err = l2cap_create_channel_req(conn, cmd, cmd_len, data); + break; + + case L2CAP_CREATE_CHAN_RSP: + err = l2cap_create_channel_rsp(conn, cmd, data); + break; + + case L2CAP_MOVE_CHAN_REQ: + err = l2cap_move_channel_req(conn, cmd, cmd_len, data); + break; + + case L2CAP_MOVE_CHAN_RSP: + err = l2cap_move_channel_rsp(conn, cmd, cmd_len, data); + break; + + case L2CAP_MOVE_CHAN_CFM: + err = l2cap_move_channel_confirm(conn, cmd, cmd_len, data); + break; + + case L2CAP_MOVE_CHAN_CFM_RSP: + err = l2cap_move_channel_confirm_rsp(conn, cmd, cmd_len, data); + break; + default: BT_ERR("Unknown BR/EDR signaling command 0x%2.2x", cmd->code); err = -EINVAL; @@ -3040,10 +3531,15 @@ static inline void l2cap_sig_channel(struct l2cap_conn *conn, static int l2cap_check_fcs(struct l2cap_chan *chan, struct sk_buff *skb) { u16 our_fcs, rcv_fcs; - int hdr_size = L2CAP_HDR_SIZE + 2; + int hdr_size; + + if (test_bit(FLAG_EXT_CTRL, &chan->flags)) + hdr_size = L2CAP_EXT_HDR_SIZE; + else + hdr_size = L2CAP_ENH_HDR_SIZE; if (chan->fcs == L2CAP_FCS_CRC16) { - skb_trim(skb, skb->len - 2); + skb_trim(skb, skb->len - L2CAP_FCS_SIZE); rcv_fcs = get_unaligned_le16(skb->data + skb->len); our_fcs = crc16(0, skb->data - hdr_size, skb->len + hdr_size); @@ -3055,14 +3551,14 @@ static int l2cap_check_fcs(struct l2cap_chan *chan, struct sk_buff *skb) static inline void l2cap_send_i_or_rr_or_rnr(struct l2cap_chan *chan) { - u16 control = 0; + u32 control = 0; chan->frames_sent = 0; - control |= chan->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT; + control |= __set_reqseq(chan, chan->buffer_seq); if (test_bit(CONN_LOCAL_BUSY, &chan->conn_state)) { - control |= L2CAP_SUPER_RCV_NOT_READY; + control |= __set_ctrl_super(chan, L2CAP_SUPER_RNR); l2cap_send_sframe(chan, control); set_bit(CONN_RNR_SENT, &chan->conn_state); } @@ -3074,12 +3570,12 @@ static inline void l2cap_send_i_or_rr_or_rnr(struct l2cap_chan *chan) if (!test_bit(CONN_LOCAL_BUSY, &chan->conn_state) && chan->frames_sent == 0) { - control |= L2CAP_SUPER_RCV_READY; + control |= __set_ctrl_super(chan, L2CAP_SUPER_RR); l2cap_send_sframe(chan, control); } } -static int l2cap_add_to_srej_queue(struct l2cap_chan *chan, struct sk_buff *skb, u8 tx_seq, u8 sar) +static int l2cap_add_to_srej_queue(struct l2cap_chan *chan, struct sk_buff *skb, u16 tx_seq, u8 sar) { struct sk_buff *next_skb; int tx_seq_offset, next_tx_seq_offset; @@ -3088,23 +3584,15 @@ static int l2cap_add_to_srej_queue(struct l2cap_chan *chan, struct sk_buff *skb, bt_cb(skb)->sar = sar; next_skb = skb_peek(&chan->srej_q); - if (!next_skb) { - __skb_queue_tail(&chan->srej_q, skb); - return 0; - } - tx_seq_offset = (tx_seq - chan->buffer_seq) % 64; - if (tx_seq_offset < 0) - tx_seq_offset += 64; + tx_seq_offset = __seq_offset(chan, tx_seq, chan->buffer_seq); - do { + while (next_skb) { if (bt_cb(next_skb)->tx_seq == tx_seq) return -EINVAL; - next_tx_seq_offset = (bt_cb(next_skb)->tx_seq - - chan->buffer_seq) % 64; - if (next_tx_seq_offset < 0) - next_tx_seq_offset += 64; + next_tx_seq_offset = __seq_offset(chan, + bt_cb(next_skb)->tx_seq, chan->buffer_seq); if (next_tx_seq_offset > tx_seq_offset) { __skb_queue_before(&chan->srej_q, next_skb, skb); @@ -3112,123 +3600,126 @@ static int l2cap_add_to_srej_queue(struct l2cap_chan *chan, struct sk_buff *skb, } if (skb_queue_is_last(&chan->srej_q, next_skb)) - break; - - } while ((next_skb = skb_queue_next(&chan->srej_q, next_skb))); + next_skb = NULL; + else + next_skb = skb_queue_next(&chan->srej_q, next_skb); + } __skb_queue_tail(&chan->srej_q, skb); return 0; } -static int l2cap_ertm_reassembly_sdu(struct l2cap_chan *chan, struct sk_buff *skb, u16 control) +static void append_skb_frag(struct sk_buff *skb, + struct sk_buff *new_frag, struct sk_buff **last_frag) { - struct sk_buff *_skb; - int err; - - switch (control & L2CAP_CTRL_SAR) { - case L2CAP_SDU_UNSEGMENTED: - if (test_bit(CONN_SAR_SDU, &chan->conn_state)) - goto drop; - - return chan->ops->recv(chan->data, skb); + /* skb->len reflects data in skb as well as all fragments + * skb->data_len reflects only data in fragments + */ + if (!skb_has_frag_list(skb)) + skb_shinfo(skb)->frag_list = new_frag; - case L2CAP_SDU_START: - if (test_bit(CONN_SAR_SDU, &chan->conn_state)) - goto drop; + new_frag->next = NULL; - chan->sdu_len = get_unaligned_le16(skb->data); + (*last_frag)->next = new_frag; + *last_frag = new_frag; - if (chan->sdu_len > chan->imtu) - goto disconnect; - - chan->sdu = bt_skb_alloc(chan->sdu_len, GFP_ATOMIC); - if (!chan->sdu) - return -ENOMEM; + skb->len += new_frag->len; + skb->data_len += new_frag->len; + skb->truesize += new_frag->truesize; +} - /* pull sdu_len bytes only after alloc, because of Local Busy - * condition we have to be sure that this will be executed - * only once, i.e., when alloc does not fail */ - skb_pull(skb, 2); +static int l2cap_reassemble_sdu(struct l2cap_chan *chan, struct sk_buff *skb, u32 control) +{ + int err = -EINVAL; - memcpy(skb_put(chan->sdu, skb->len), skb->data, skb->len); + switch (__get_ctrl_sar(chan, control)) { + case L2CAP_SAR_UNSEGMENTED: + if (chan->sdu) + break; - set_bit(CONN_SAR_SDU, &chan->conn_state); - chan->partial_sdu_len = skb->len; + err = chan->ops->recv(chan->data, skb); break; - case L2CAP_SDU_CONTINUE: - if (!test_bit(CONN_SAR_SDU, &chan->conn_state)) - goto disconnect; + case L2CAP_SAR_START: + if (chan->sdu) + break; - if (!chan->sdu) - goto disconnect; + chan->sdu_len = get_unaligned_le16(skb->data); + skb_pull(skb, L2CAP_SDULEN_SIZE); - chan->partial_sdu_len += skb->len; - if (chan->partial_sdu_len > chan->sdu_len) - goto drop; + if (chan->sdu_len > chan->imtu) { + err = -EMSGSIZE; + break; + } - memcpy(skb_put(chan->sdu, skb->len), skb->data, skb->len); + if (skb->len >= chan->sdu_len) + break; - break; + chan->sdu = skb; + chan->sdu_last_frag = skb; - case L2CAP_SDU_END: - if (!test_bit(CONN_SAR_SDU, &chan->conn_state)) - goto disconnect; + skb = NULL; + err = 0; + break; + case L2CAP_SAR_CONTINUE: if (!chan->sdu) - goto disconnect; + break; - chan->partial_sdu_len += skb->len; + append_skb_frag(chan->sdu, skb, + &chan->sdu_last_frag); + skb = NULL; - if (chan->partial_sdu_len > chan->imtu) - goto drop; + if (chan->sdu->len >= chan->sdu_len) + break; - if (chan->partial_sdu_len != chan->sdu_len) - goto drop; + err = 0; + break; - memcpy(skb_put(chan->sdu, skb->len), skb->data, skb->len); + case L2CAP_SAR_END: + if (!chan->sdu) + break; - _skb = skb_clone(chan->sdu, GFP_ATOMIC); - if (!_skb) { - return -ENOMEM; - } + append_skb_frag(chan->sdu, skb, + &chan->sdu_last_frag); + skb = NULL; - err = chan->ops->recv(chan->data, _skb); - if (err < 0) { - kfree_skb(_skb); - return err; - } + if (chan->sdu->len != chan->sdu_len) + break; - clear_bit(CONN_SAR_SDU, &chan->conn_state); + err = chan->ops->recv(chan->data, chan->sdu); - kfree_skb(chan->sdu); + if (!err) { + /* Reassembly complete */ + chan->sdu = NULL; + chan->sdu_last_frag = NULL; + chan->sdu_len = 0; + } break; } - kfree_skb(skb); - return 0; - -drop: - kfree_skb(chan->sdu); - chan->sdu = NULL; + if (err) { + kfree_skb(skb); + kfree_skb(chan->sdu); + chan->sdu = NULL; + chan->sdu_last_frag = NULL; + chan->sdu_len = 0; + } -disconnect: - l2cap_send_disconn_req(chan->conn, chan, ECONNRESET); - kfree_skb(skb); - return 0; + return err; } static void l2cap_ertm_enter_local_busy(struct l2cap_chan *chan) { - u16 control; + u32 control; BT_DBG("chan %p, Enter local busy", chan); set_bit(CONN_LOCAL_BUSY, &chan->conn_state); - control = chan->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT; - control |= L2CAP_SUPER_RCV_NOT_READY; + control = __set_reqseq(chan, chan->buffer_seq); + control |= __set_ctrl_super(chan, L2CAP_SUPER_RNR); l2cap_send_sframe(chan, control); set_bit(CONN_RNR_SENT, &chan->conn_state); @@ -3238,13 +3729,14 @@ static void l2cap_ertm_enter_local_busy(struct l2cap_chan *chan) static void l2cap_ertm_exit_local_busy(struct l2cap_chan *chan) { - u16 control; + u32 control; if (!test_bit(CONN_RNR_SENT, &chan->conn_state)) goto done; - control = chan->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT; - control |= L2CAP_SUPER_RCV_READY | L2CAP_CTRL_POLL; + control = __set_reqseq(chan, chan->buffer_seq); + control |= __set_ctrl_poll(chan); + control |= __set_ctrl_super(chan, L2CAP_SUPER_RR); l2cap_send_sframe(chan, control); chan->retry_count = 1; @@ -3270,103 +3762,10 @@ void l2cap_chan_busy(struct l2cap_chan *chan, int busy) } } -static int l2cap_streaming_reassembly_sdu(struct l2cap_chan *chan, struct sk_buff *skb, u16 control) -{ - struct sk_buff *_skb; - int err = -EINVAL; - - /* - * TODO: We have to notify the userland if some data is lost with the - * Streaming Mode. - */ - - switch (control & L2CAP_CTRL_SAR) { - case L2CAP_SDU_UNSEGMENTED: - if (test_bit(CONN_SAR_SDU, &chan->conn_state)) { - kfree_skb(chan->sdu); - break; - } - - err = chan->ops->recv(chan->data, skb); - if (!err) - return 0; - - break; - - case L2CAP_SDU_START: - if (test_bit(CONN_SAR_SDU, &chan->conn_state)) { - kfree_skb(chan->sdu); - break; - } - - chan->sdu_len = get_unaligned_le16(skb->data); - skb_pull(skb, 2); - - if (chan->sdu_len > chan->imtu) { - err = -EMSGSIZE; - break; - } - - chan->sdu = bt_skb_alloc(chan->sdu_len, GFP_ATOMIC); - if (!chan->sdu) { - err = -ENOMEM; - break; - } - - memcpy(skb_put(chan->sdu, skb->len), skb->data, skb->len); - - set_bit(CONN_SAR_SDU, &chan->conn_state); - chan->partial_sdu_len = skb->len; - err = 0; - break; - - case L2CAP_SDU_CONTINUE: - if (!test_bit(CONN_SAR_SDU, &chan->conn_state)) - break; - - memcpy(skb_put(chan->sdu, skb->len), skb->data, skb->len); - - chan->partial_sdu_len += skb->len; - if (chan->partial_sdu_len > chan->sdu_len) - kfree_skb(chan->sdu); - else - err = 0; - - break; - - case L2CAP_SDU_END: - if (!test_bit(CONN_SAR_SDU, &chan->conn_state)) - break; - - memcpy(skb_put(chan->sdu, skb->len), skb->data, skb->len); - - clear_bit(CONN_SAR_SDU, &chan->conn_state); - chan->partial_sdu_len += skb->len; - - if (chan->partial_sdu_len > chan->imtu) - goto drop; - - if (chan->partial_sdu_len == chan->sdu_len) { - _skb = skb_clone(chan->sdu, GFP_ATOMIC); - err = chan->ops->recv(chan->data, _skb); - if (err < 0) - kfree_skb(_skb); - } - err = 0; - -drop: - kfree_skb(chan->sdu); - break; - } - - kfree_skb(skb); - return err; -} - -static void l2cap_check_srej_gap(struct l2cap_chan *chan, u8 tx_seq) +static void l2cap_check_srej_gap(struct l2cap_chan *chan, u16 tx_seq) { struct sk_buff *skb; - u16 control; + u32 control; while ((skb = skb_peek(&chan->srej_q)) && !test_bit(CONN_LOCAL_BUSY, &chan->conn_state)) { @@ -3376,24 +3775,23 @@ static void l2cap_check_srej_gap(struct l2cap_chan *chan, u8 tx_seq) break; skb = skb_dequeue(&chan->srej_q); - control = bt_cb(skb)->sar << L2CAP_CTRL_SAR_SHIFT; - err = l2cap_ertm_reassembly_sdu(chan, skb, control); + control = __set_ctrl_sar(chan, bt_cb(skb)->sar); + err = l2cap_reassemble_sdu(chan, skb, control); if (err < 0) { l2cap_send_disconn_req(chan->conn, chan, ECONNRESET); break; } - chan->buffer_seq_srej = - (chan->buffer_seq_srej + 1) % 64; - tx_seq = (tx_seq + 1) % 64; + chan->buffer_seq_srej = __next_seq(chan, chan->buffer_seq_srej); + tx_seq = __next_seq(chan, tx_seq); } } -static void l2cap_resend_srejframe(struct l2cap_chan *chan, u8 tx_seq) +static void l2cap_resend_srejframe(struct l2cap_chan *chan, u16 tx_seq) { struct srej_list *l, *tmp; - u16 control; + u32 control; list_for_each_entry_safe(l, tmp, &chan->srej_l, list) { if (l->tx_seq == tx_seq) { @@ -3401,45 +3799,53 @@ static void l2cap_resend_srejframe(struct l2cap_chan *chan, u8 tx_seq) kfree(l); return; } - control = L2CAP_SUPER_SELECT_REJECT; - control |= l->tx_seq << L2CAP_CTRL_REQSEQ_SHIFT; + control = __set_ctrl_super(chan, L2CAP_SUPER_SREJ); + control |= __set_reqseq(chan, l->tx_seq); l2cap_send_sframe(chan, control); list_del(&l->list); list_add_tail(&l->list, &chan->srej_l); } } -static void l2cap_send_srejframe(struct l2cap_chan *chan, u8 tx_seq) +static int l2cap_send_srejframe(struct l2cap_chan *chan, u16 tx_seq) { struct srej_list *new; - u16 control; + u32 control; while (tx_seq != chan->expected_tx_seq) { - control = L2CAP_SUPER_SELECT_REJECT; - control |= chan->expected_tx_seq << L2CAP_CTRL_REQSEQ_SHIFT; + control = __set_ctrl_super(chan, L2CAP_SUPER_SREJ); + control |= __set_reqseq(chan, chan->expected_tx_seq); l2cap_send_sframe(chan, control); new = kzalloc(sizeof(struct srej_list), GFP_ATOMIC); + if (!new) + return -ENOMEM; + new->tx_seq = chan->expected_tx_seq; - chan->expected_tx_seq = (chan->expected_tx_seq + 1) % 64; + + chan->expected_tx_seq = __next_seq(chan, chan->expected_tx_seq); + list_add_tail(&new->list, &chan->srej_l); } - chan->expected_tx_seq = (chan->expected_tx_seq + 1) % 64; + + chan->expected_tx_seq = __next_seq(chan, chan->expected_tx_seq); + + return 0; } -static inline int l2cap_data_channel_iframe(struct l2cap_chan *chan, u16 rx_control, struct sk_buff *skb) +static inline int l2cap_data_channel_iframe(struct l2cap_chan *chan, u32 rx_control, struct sk_buff *skb) { - u8 tx_seq = __get_txseq(rx_control); - u8 req_seq = __get_reqseq(rx_control); - u8 sar = rx_control >> L2CAP_CTRL_SAR_SHIFT; + u16 tx_seq = __get_txseq(chan, rx_control); + u16 req_seq = __get_reqseq(chan, rx_control); + u8 sar = __get_ctrl_sar(chan, rx_control); int tx_seq_offset, expected_tx_seq_offset; int num_to_ack = (chan->tx_win/6) + 1; int err = 0; - BT_DBG("chan %p len %d tx_seq %d rx_control 0x%4.4x", chan, skb->len, + BT_DBG("chan %p len %d tx_seq %d rx_control 0x%8.8x", chan, skb->len, tx_seq, rx_control); - if (L2CAP_CTRL_FINAL & rx_control && + if (__is_ctrl_final(chan, rx_control) && test_bit(CONN_WAIT_F, &chan->conn_state)) { __clear_monitor_timer(chan); if (chan->unacked_frames > 0) @@ -3450,9 +3856,7 @@ static inline int l2cap_data_channel_iframe(struct l2cap_chan *chan, u16 rx_cont chan->expected_ack_seq = req_seq; l2cap_drop_acked_frames(chan); - tx_seq_offset = (tx_seq - chan->buffer_seq) % 64; - if (tx_seq_offset < 0) - tx_seq_offset += 64; + tx_seq_offset = __seq_offset(chan, tx_seq, chan->buffer_seq); /* invalid tx_seq */ if (tx_seq_offset >= chan->tx_win) { @@ -3497,13 +3901,16 @@ static inline int l2cap_data_channel_iframe(struct l2cap_chan *chan, u16 rx_cont return 0; } } - l2cap_send_srejframe(chan, tx_seq); + + err = l2cap_send_srejframe(chan, tx_seq); + if (err < 0) { + l2cap_send_disconn_req(chan->conn, chan, -err); + return err; + } } } else { - expected_tx_seq_offset = - (chan->expected_tx_seq - chan->buffer_seq) % 64; - if (expected_tx_seq_offset < 0) - expected_tx_seq_offset += 64; + expected_tx_seq_offset = __seq_offset(chan, + chan->expected_tx_seq, chan->buffer_seq); /* duplicated tx_seq */ if (tx_seq_offset < expected_tx_seq_offset) @@ -3521,14 +3928,18 @@ static inline int l2cap_data_channel_iframe(struct l2cap_chan *chan, u16 rx_cont set_bit(CONN_SEND_PBIT, &chan->conn_state); - l2cap_send_srejframe(chan, tx_seq); + err = l2cap_send_srejframe(chan, tx_seq); + if (err < 0) { + l2cap_send_disconn_req(chan->conn, chan, -err); + return err; + } __clear_ack_timer(chan); } return 0; expected: - chan->expected_tx_seq = (chan->expected_tx_seq + 1) % 64; + chan->expected_tx_seq = __next_seq(chan, chan->expected_tx_seq); if (test_bit(CONN_SREJ_SENT, &chan->conn_state)) { bt_cb(skb)->tx_seq = tx_seq; @@ -3537,23 +3948,25 @@ expected: return 0; } - err = l2cap_ertm_reassembly_sdu(chan, skb, rx_control); - chan->buffer_seq = (chan->buffer_seq + 1) % 64; + err = l2cap_reassemble_sdu(chan, skb, rx_control); + chan->buffer_seq = __next_seq(chan, chan->buffer_seq); + if (err < 0) { l2cap_send_disconn_req(chan->conn, chan, ECONNRESET); return err; } - if (rx_control & L2CAP_CTRL_FINAL) { + if (__is_ctrl_final(chan, rx_control)) { if (!test_and_clear_bit(CONN_REJ_ACT, &chan->conn_state)) l2cap_retransmit_frames(chan); } - __set_ack_timer(chan); chan->num_acked = (chan->num_acked + 1) % num_to_ack; if (chan->num_acked == num_to_ack - 1) l2cap_send_ack(chan); + else + __set_ack_timer(chan); return 0; @@ -3562,15 +3975,15 @@ drop: return 0; } -static inline void l2cap_data_channel_rrframe(struct l2cap_chan *chan, u16 rx_control) +static inline void l2cap_data_channel_rrframe(struct l2cap_chan *chan, u32 rx_control) { - BT_DBG("chan %p, req_seq %d ctrl 0x%4.4x", chan, __get_reqseq(rx_control), - rx_control); + BT_DBG("chan %p, req_seq %d ctrl 0x%8.8x", chan, + __get_reqseq(chan, rx_control), rx_control); - chan->expected_ack_seq = __get_reqseq(rx_control); + chan->expected_ack_seq = __get_reqseq(chan, rx_control); l2cap_drop_acked_frames(chan); - if (rx_control & L2CAP_CTRL_POLL) { + if (__is_ctrl_poll(chan, rx_control)) { set_bit(CONN_SEND_FBIT, &chan->conn_state); if (test_bit(CONN_SREJ_SENT, &chan->conn_state)) { if (test_bit(CONN_REMOTE_BUSY, &chan->conn_state) && @@ -3583,7 +3996,7 @@ static inline void l2cap_data_channel_rrframe(struct l2cap_chan *chan, u16 rx_co l2cap_send_i_or_rr_or_rnr(chan); } - } else if (rx_control & L2CAP_CTRL_FINAL) { + } else if (__is_ctrl_final(chan, rx_control)) { clear_bit(CONN_REMOTE_BUSY, &chan->conn_state); if (!test_and_clear_bit(CONN_REJ_ACT, &chan->conn_state)) @@ -3602,18 +4015,18 @@ static inline void l2cap_data_channel_rrframe(struct l2cap_chan *chan, u16 rx_co } } -static inline void l2cap_data_channel_rejframe(struct l2cap_chan *chan, u16 rx_control) +static inline void l2cap_data_channel_rejframe(struct l2cap_chan *chan, u32 rx_control) { - u8 tx_seq = __get_reqseq(rx_control); + u16 tx_seq = __get_reqseq(chan, rx_control); - BT_DBG("chan %p, req_seq %d ctrl 0x%4.4x", chan, tx_seq, rx_control); + BT_DBG("chan %p, req_seq %d ctrl 0x%8.8x", chan, tx_seq, rx_control); clear_bit(CONN_REMOTE_BUSY, &chan->conn_state); chan->expected_ack_seq = tx_seq; l2cap_drop_acked_frames(chan); - if (rx_control & L2CAP_CTRL_FINAL) { + if (__is_ctrl_final(chan, rx_control)) { if (!test_and_clear_bit(CONN_REJ_ACT, &chan->conn_state)) l2cap_retransmit_frames(chan); } else { @@ -3623,15 +4036,15 @@ static inline void l2cap_data_channel_rejframe(struct l2cap_chan *chan, u16 rx_c set_bit(CONN_REJ_ACT, &chan->conn_state); } } -static inline void l2cap_data_channel_srejframe(struct l2cap_chan *chan, u16 rx_control) +static inline void l2cap_data_channel_srejframe(struct l2cap_chan *chan, u32 rx_control) { - u8 tx_seq = __get_reqseq(rx_control); + u16 tx_seq = __get_reqseq(chan, rx_control); - BT_DBG("chan %p, req_seq %d ctrl 0x%4.4x", chan, tx_seq, rx_control); + BT_DBG("chan %p, req_seq %d ctrl 0x%8.8x", chan, tx_seq, rx_control); clear_bit(CONN_REMOTE_BUSY, &chan->conn_state); - if (rx_control & L2CAP_CTRL_POLL) { + if (__is_ctrl_poll(chan, rx_control)) { chan->expected_ack_seq = tx_seq; l2cap_drop_acked_frames(chan); @@ -3644,7 +4057,7 @@ static inline void l2cap_data_channel_srejframe(struct l2cap_chan *chan, u16 rx_ chan->srej_save_reqseq = tx_seq; set_bit(CONN_SREJ_ACT, &chan->conn_state); } - } else if (rx_control & L2CAP_CTRL_FINAL) { + } else if (__is_ctrl_final(chan, rx_control)) { if (test_bit(CONN_SREJ_ACT, &chan->conn_state) && chan->srej_save_reqseq == tx_seq) clear_bit(CONN_SREJ_ACT, &chan->conn_state); @@ -3659,37 +4072,39 @@ static inline void l2cap_data_channel_srejframe(struct l2cap_chan *chan, u16 rx_ } } -static inline void l2cap_data_channel_rnrframe(struct l2cap_chan *chan, u16 rx_control) +static inline void l2cap_data_channel_rnrframe(struct l2cap_chan *chan, u32 rx_control) { - u8 tx_seq = __get_reqseq(rx_control); + u16 tx_seq = __get_reqseq(chan, rx_control); - BT_DBG("chan %p, req_seq %d ctrl 0x%4.4x", chan, tx_seq, rx_control); + BT_DBG("chan %p, req_seq %d ctrl 0x%8.8x", chan, tx_seq, rx_control); set_bit(CONN_REMOTE_BUSY, &chan->conn_state); chan->expected_ack_seq = tx_seq; l2cap_drop_acked_frames(chan); - if (rx_control & L2CAP_CTRL_POLL) + if (__is_ctrl_poll(chan, rx_control)) set_bit(CONN_SEND_FBIT, &chan->conn_state); if (!test_bit(CONN_SREJ_SENT, &chan->conn_state)) { __clear_retrans_timer(chan); - if (rx_control & L2CAP_CTRL_POLL) + if (__is_ctrl_poll(chan, rx_control)) l2cap_send_rr_or_rnr(chan, L2CAP_CTRL_FINAL); return; } - if (rx_control & L2CAP_CTRL_POLL) + if (__is_ctrl_poll(chan, rx_control)) { l2cap_send_srejtail(chan); - else - l2cap_send_sframe(chan, L2CAP_SUPER_RCV_READY); + } else { + rx_control = __set_ctrl_super(chan, L2CAP_SUPER_RR); + l2cap_send_sframe(chan, rx_control); + } } -static inline int l2cap_data_channel_sframe(struct l2cap_chan *chan, u16 rx_control, struct sk_buff *skb) +static inline int l2cap_data_channel_sframe(struct l2cap_chan *chan, u32 rx_control, struct sk_buff *skb) { - BT_DBG("chan %p rx_control 0x%4.4x len %d", chan, rx_control, skb->len); + BT_DBG("chan %p rx_control 0x%8.8x len %d", chan, rx_control, skb->len); - if (L2CAP_CTRL_FINAL & rx_control && + if (__is_ctrl_final(chan, rx_control) && test_bit(CONN_WAIT_F, &chan->conn_state)) { __clear_monitor_timer(chan); if (chan->unacked_frames > 0) @@ -3697,20 +4112,20 @@ static inline int l2cap_data_channel_sframe(struct l2cap_chan *chan, u16 rx_cont clear_bit(CONN_WAIT_F, &chan->conn_state); } - switch (rx_control & L2CAP_CTRL_SUPERVISE) { - case L2CAP_SUPER_RCV_READY: + switch (__get_ctrl_super(chan, rx_control)) { + case L2CAP_SUPER_RR: l2cap_data_channel_rrframe(chan, rx_control); break; - case L2CAP_SUPER_REJECT: + case L2CAP_SUPER_REJ: l2cap_data_channel_rejframe(chan, rx_control); break; - case L2CAP_SUPER_SELECT_REJECT: + case L2CAP_SUPER_SREJ: l2cap_data_channel_srejframe(chan, rx_control); break; - case L2CAP_SUPER_RCV_NOT_READY: + case L2CAP_SUPER_RNR: l2cap_data_channel_rnrframe(chan, rx_control); break; } @@ -3722,12 +4137,12 @@ static inline int l2cap_data_channel_sframe(struct l2cap_chan *chan, u16 rx_cont static int l2cap_ertm_data_rcv(struct sock *sk, struct sk_buff *skb) { struct l2cap_chan *chan = l2cap_pi(sk)->chan; - u16 control; - u8 req_seq; + u32 control; + u16 req_seq; int len, next_tx_seq_offset, req_seq_offset; - control = get_unaligned_le16(skb->data); - skb_pull(skb, 2); + control = __get_control(chan, skb->data); + skb_pull(skb, __ctrl_size(chan)); len = skb->len; /* @@ -3738,26 +4153,23 @@ static int l2cap_ertm_data_rcv(struct sock *sk, struct sk_buff *skb) if (l2cap_check_fcs(chan, skb)) goto drop; - if (__is_sar_start(control) && __is_iframe(control)) - len -= 2; + if (__is_sar_start(chan, control) && !__is_sframe(chan, control)) + len -= L2CAP_SDULEN_SIZE; if (chan->fcs == L2CAP_FCS_CRC16) - len -= 2; + len -= L2CAP_FCS_SIZE; if (len > chan->mps) { l2cap_send_disconn_req(chan->conn, chan, ECONNRESET); goto drop; } - req_seq = __get_reqseq(control); - req_seq_offset = (req_seq - chan->expected_ack_seq) % 64; - if (req_seq_offset < 0) - req_seq_offset += 64; + req_seq = __get_reqseq(chan, control); - next_tx_seq_offset = - (chan->next_tx_seq - chan->expected_ack_seq) % 64; - if (next_tx_seq_offset < 0) - next_tx_seq_offset += 64; + req_seq_offset = __seq_offset(chan, req_seq, chan->expected_ack_seq); + + next_tx_seq_offset = __seq_offset(chan, chan->next_tx_seq, + chan->expected_ack_seq); /* check for invalid req-seq */ if (req_seq_offset > next_tx_seq_offset) { @@ -3765,7 +4177,7 @@ static int l2cap_ertm_data_rcv(struct sock *sk, struct sk_buff *skb) goto drop; } - if (__is_iframe(control)) { + if (!__is_sframe(chan, control)) { if (len < 0) { l2cap_send_disconn_req(chan->conn, chan, ECONNRESET); goto drop; @@ -3793,8 +4205,8 @@ static inline int l2cap_data_channel(struct l2cap_conn *conn, u16 cid, struct sk { struct l2cap_chan *chan; struct sock *sk = NULL; - u16 control; - u8 tx_seq; + u32 control; + u16 tx_seq; int len; chan = l2cap_get_chan_by_scid(conn, cid); @@ -3825,40 +4237,43 @@ static inline int l2cap_data_channel(struct l2cap_conn *conn, u16 cid, struct sk break; case L2CAP_MODE_ERTM: - if (!sock_owned_by_user(sk)) { - l2cap_ertm_data_rcv(sk, skb); - } else { - if (sk_add_backlog(sk, skb)) - goto drop; - } + l2cap_ertm_data_rcv(sk, skb); goto done; case L2CAP_MODE_STREAMING: - control = get_unaligned_le16(skb->data); - skb_pull(skb, 2); + control = __get_control(chan, skb->data); + skb_pull(skb, __ctrl_size(chan)); len = skb->len; if (l2cap_check_fcs(chan, skb)) goto drop; - if (__is_sar_start(control)) - len -= 2; + if (__is_sar_start(chan, control)) + len -= L2CAP_SDULEN_SIZE; if (chan->fcs == L2CAP_FCS_CRC16) - len -= 2; + len -= L2CAP_FCS_SIZE; - if (len > chan->mps || len < 0 || __is_sframe(control)) + if (len > chan->mps || len < 0 || __is_sframe(chan, control)) goto drop; - tx_seq = __get_txseq(control); + tx_seq = __get_txseq(chan, control); - if (chan->expected_tx_seq == tx_seq) - chan->expected_tx_seq = (chan->expected_tx_seq + 1) % 64; - else - chan->expected_tx_seq = (tx_seq + 1) % 64; + if (chan->expected_tx_seq != tx_seq) { + /* Frame(s) missing - must discard partial SDU */ + kfree_skb(chan->sdu); + chan->sdu = NULL; + chan->sdu_last_frag = NULL; + chan->sdu_len = 0; + + /* TODO: Notify userland of missing data */ + } + + chan->expected_tx_seq = __next_seq(chan, tx_seq); - l2cap_streaming_reassembly_sdu(chan, skb, control); + if (l2cap_reassemble_sdu(chan, skb, control) == -EMSGSIZE) + l2cap_send_disconn_req(chan->conn, chan, ECONNRESET); goto done; @@ -3872,7 +4287,7 @@ drop: done: if (sk) - bh_unlock_sock(sk); + release_sock(sk); return 0; } @@ -3888,7 +4303,7 @@ static inline int l2cap_conless_channel(struct l2cap_conn *conn, __le16 psm, str sk = chan->sk; - bh_lock_sock(sk); + lock_sock(sk); BT_DBG("sk %p, len %d", sk, skb->len); @@ -3906,7 +4321,7 @@ drop: done: if (sk) - bh_unlock_sock(sk); + release_sock(sk); return 0; } @@ -3921,7 +4336,7 @@ static inline int l2cap_att_channel(struct l2cap_conn *conn, __le16 cid, struct sk = chan->sk; - bh_lock_sock(sk); + lock_sock(sk); BT_DBG("sk %p, len %d", sk, skb->len); @@ -3939,7 +4354,7 @@ drop: done: if (sk) - bh_unlock_sock(sk); + release_sock(sk); return 0; } @@ -3989,14 +4404,11 @@ static void l2cap_recv_frame(struct l2cap_conn *conn, struct sk_buff *skb) /* ---- L2CAP interface with lower layer (HCI) ---- */ -static int l2cap_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 type) +int l2cap_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr) { int exact = 0, lm1 = 0, lm2 = 0; struct l2cap_chan *c; - if (type != ACL_LINK) - return -EINVAL; - BT_DBG("hdev %s, bdaddr %s", hdev->name, batostr(bdaddr)); /* Find listening sockets and check their link_mode */ @@ -4009,12 +4421,12 @@ static int l2cap_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 type) if (!bacmp(&bt_sk(sk)->src, &hdev->bdaddr)) { lm1 |= HCI_LM_ACCEPT; - if (c->role_switch) + if (test_bit(FLAG_ROLE_SWITCH, &c->flags)) lm1 |= HCI_LM_MASTER; exact++; } else if (!bacmp(&bt_sk(sk)->src, BDADDR_ANY)) { lm2 |= HCI_LM_ACCEPT; - if (c->role_switch) + if (test_bit(FLAG_ROLE_SWITCH, &c->flags)) lm2 |= HCI_LM_MASTER; } } @@ -4023,15 +4435,12 @@ static int l2cap_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 type) return exact ? lm1 : lm2; } -static int l2cap_connect_cfm(struct hci_conn *hcon, u8 status) +int l2cap_connect_cfm(struct hci_conn *hcon, u8 status) { struct l2cap_conn *conn; BT_DBG("hcon %p bdaddr %s status %d", hcon, batostr(&hcon->dst), status); - if (!(hcon->type == ACL_LINK || hcon->type == LE_LINK)) - return -EINVAL; - if (!status) { conn = l2cap_conn_add(hcon, status); if (conn) @@ -4042,27 +4451,22 @@ static int l2cap_connect_cfm(struct hci_conn *hcon, u8 status) return 0; } -static int l2cap_disconn_ind(struct hci_conn *hcon) +int l2cap_disconn_ind(struct hci_conn *hcon) { struct l2cap_conn *conn = hcon->l2cap_data; BT_DBG("hcon %p", hcon); - if ((hcon->type != ACL_LINK && hcon->type != LE_LINK) || !conn) - return 0x13; - + if (!conn) + return HCI_ERROR_REMOTE_USER_TERM; return conn->disc_reason; } -static int l2cap_disconn_cfm(struct hci_conn *hcon, u8 reason) +int l2cap_disconn_cfm(struct hci_conn *hcon, u8 reason) { BT_DBG("hcon %p reason %d", hcon, reason); - if (!(hcon->type == ACL_LINK || hcon->type == LE_LINK)) - return -EINVAL; - l2cap_conn_del(hcon, bt_to_errno(reason)); - return 0; } @@ -4074,7 +4478,7 @@ static inline void l2cap_check_encryption(struct l2cap_chan *chan, u8 encrypt) if (encrypt == 0x00) { if (chan->sec_level == BT_SECURITY_MEDIUM) { __clear_chan_timer(chan); - __set_chan_timer(chan, HZ * 5); + __set_chan_timer(chan, L2CAP_ENC_TIMEOUT); } else if (chan->sec_level == BT_SECURITY_HIGH) l2cap_chan_close(chan, ECONNREFUSED); } else { @@ -4083,7 +4487,7 @@ static inline void l2cap_check_encryption(struct l2cap_chan *chan, u8 encrypt) } } -static int l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt) +int l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt) { struct l2cap_conn *conn = hcon->l2cap_data; struct l2cap_chan *chan; @@ -4093,9 +4497,14 @@ static int l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt) BT_DBG("conn %p", conn); - read_lock(&conn->chan_lock); + if (hcon->type == LE_LINK) { + smp_distribute_keys(conn, 0); + __cancel_delayed_work(&conn->security_timer); + } + + rcu_read_lock(); - list_for_each_entry(chan, &conn->chan_l, list) { + list_for_each_entry_rcu(chan, &conn->chan_l, list) { struct sock *sk = chan->sk; bh_lock_sock(sk); @@ -4105,9 +4514,7 @@ static int l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt) if (chan->scid == L2CAP_CID_LE_DATA) { if (!status && encrypt) { chan->sec_level = hcon->sec_level; - del_timer(&conn->security_timer); l2cap_chan_ready(sk); - smp_distribute_keys(conn, 0); } bh_unlock_sock(sk); @@ -4139,7 +4546,7 @@ static int l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt) L2CAP_CONN_REQ, sizeof(req), &req); } else { __clear_chan_timer(chan); - __set_chan_timer(chan, HZ / 10); + __set_chan_timer(chan, L2CAP_DISC_TIMEOUT); } } else if (chan->state == BT_CONNECT2) { struct l2cap_conn_rsp rsp; @@ -4159,7 +4566,7 @@ static int l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt) } } else { l2cap_state_change(chan, BT_DISCONN); - __set_chan_timer(chan, HZ / 10); + __set_chan_timer(chan, L2CAP_DISC_TIMEOUT); res = L2CAP_CR_SEC_BLOCK; stat = L2CAP_CS_NO_INFO; } @@ -4175,12 +4582,12 @@ static int l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt) bh_unlock_sock(sk); } - read_unlock(&conn->chan_lock); + rcu_read_unlock(); return 0; } -static int l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags) +int l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags) { struct l2cap_conn *conn = hcon->l2cap_data; @@ -4241,11 +4648,11 @@ static int l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 fl BT_ERR("Frame exceeding recv MTU (len %d, " "MTU %d)", len, chan->imtu); - bh_unlock_sock(sk); + release_sock(sk); l2cap_conn_unreliable(conn, ECOMM); goto drop; } - bh_unlock_sock(sk); + release_sock(sk); } /* Allocate skb for the complete frame (with header) */ @@ -4295,7 +4702,7 @@ static int l2cap_debugfs_show(struct seq_file *f, void *p) { struct l2cap_chan *c; - read_lock_bh(&chan_list_lock); + read_lock(&chan_list_lock); list_for_each_entry(c, &chan_list, global_l) { struct sock *sk = c->sk; @@ -4308,7 +4715,7 @@ static int l2cap_debugfs_show(struct seq_file *f, void *p) c->sec_level, c->mode); } - read_unlock_bh(&chan_list_lock); + read_unlock(&chan_list_lock); return 0; } @@ -4327,17 +4734,6 @@ static const struct file_operations l2cap_debugfs_fops = { static struct dentry *l2cap_debugfs; -static struct hci_proto l2cap_hci_proto = { - .name = "L2CAP", - .id = HCI_PROTO_L2CAP, - .connect_ind = l2cap_connect_ind, - .connect_cfm = l2cap_connect_cfm, - .disconn_ind = l2cap_disconn_ind, - .disconn_cfm = l2cap_disconn_cfm, - .security_cfm = l2cap_security_cfm, - .recv_acldata = l2cap_recv_acldata -}; - int __init l2cap_init(void) { int err; @@ -4346,13 +4742,6 @@ int __init l2cap_init(void) if (err < 0) return err; - err = hci_register_proto(&l2cap_hci_proto); - if (err < 0) { - BT_ERR("L2CAP protocol registration failed"); - bt_sock_unregister(BTPROTO_L2CAP); - goto error; - } - if (bt_debugfs) { l2cap_debugfs = debugfs_create_file("l2cap", 0444, bt_debugfs, NULL, &l2cap_debugfs_fops); @@ -4361,19 +4750,11 @@ int __init l2cap_init(void) } return 0; - -error: - l2cap_cleanup_sockets(); - return err; } void l2cap_exit(void) { debugfs_remove(l2cap_debugfs); - - if (hci_unregister_proto(&l2cap_hci_proto) < 0) - BT_ERR("L2CAP protocol unregistration failed"); - l2cap_cleanup_sockets(); } diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index e8292369cdcf..c61d967012b2 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -3,6 +3,7 @@ Copyright (C) 2000-2001 Qualcomm Incorporated Copyright (C) 2009-2010 Gustavo F. Padovan <gustavo@padovan.org> Copyright (C) 2010 Google Inc. + Copyright (C) 2011 ProFUSION Embedded Systems Written 2000,2001 by Maxim Krasnyansky <maxk@qualcomm.com> @@ -27,6 +28,7 @@ /* Bluetooth L2CAP sockets. */ #include <linux/security.h> +#include <linux/export.h> #include <net/bluetooth/bluetooth.h> #include <net/bluetooth/hci_core.h> @@ -121,70 +123,15 @@ static int l2cap_sock_connect(struct socket *sock, struct sockaddr *addr, int al if (la.l2_cid && la.l2_psm) return -EINVAL; - lock_sock(sk); - - if (chan->chan_type == L2CAP_CHAN_CONN_ORIENTED - && !(la.l2_psm || la.l2_cid)) { - err = -EINVAL; - goto done; - } - - switch (chan->mode) { - case L2CAP_MODE_BASIC: - break; - case L2CAP_MODE_ERTM: - case L2CAP_MODE_STREAMING: - if (!disable_ertm) - break; - /* fall through */ - default: - err = -ENOTSUPP; - goto done; - } - - switch (sk->sk_state) { - case BT_CONNECT: - case BT_CONNECT2: - case BT_CONFIG: - /* Already connecting */ - goto wait; - - case BT_CONNECTED: - /* Already connected */ - err = -EISCONN; - goto done; - - case BT_OPEN: - case BT_BOUND: - /* Can connect */ - break; - - default: - err = -EBADFD; - goto done; - } - - /* PSM must be odd and lsb of upper byte must be 0 */ - if ((__le16_to_cpu(la.l2_psm) & 0x0101) != 0x0001 && !la.l2_cid && - chan->chan_type != L2CAP_CHAN_RAW) { - err = -EINVAL; - goto done; - } - - /* Set destination address and psm */ - bacpy(&bt_sk(sk)->dst, &la.l2_bdaddr); - chan->psm = la.l2_psm; - chan->dcid = la.l2_cid; - - err = l2cap_chan_connect(l2cap_pi(sk)->chan); + err = l2cap_chan_connect(chan, la.l2_psm, la.l2_cid, &la.l2_bdaddr); if (err) goto done; -wait: err = bt_sock_wait_state(sk, BT_CONNECTED, sock_sndtimeo(sk, flags & O_NONBLOCK)); done: - release_sock(sk); + if (sock_owned_by_user(sk)) + release_sock(sk); return err; } @@ -333,7 +280,7 @@ static int l2cap_sock_getsockopt_old(struct socket *sock, int optname, char __us opts.mode = chan->mode; opts.fcs = chan->fcs; opts.max_tx = chan->max_tx; - opts.txwin_size = (__u16)chan->tx_win; + opts.txwin_size = chan->tx_win; len = min_t(unsigned int, len, sizeof(opts)); if (copy_to_user(optval, (char *) &opts, len)) @@ -358,10 +305,10 @@ static int l2cap_sock_getsockopt_old(struct socket *sock, int optname, char __us break; } - if (chan->role_switch) + if (test_bit(FLAG_ROLE_SWITCH, &chan->flags)) opt |= L2CAP_LM_MASTER; - if (chan->force_reliable) + if (test_bit(FLAG_FORCE_RELIABLE, &chan->flags)) opt |= L2CAP_LM_RELIABLE; if (put_user(opt, (u32 __user *) optval)) @@ -448,7 +395,8 @@ static int l2cap_sock_getsockopt(struct socket *sock, int level, int optname, ch break; case BT_FLUSHABLE: - if (put_user(chan->flushable, (u32 __user *) optval)) + if (put_user(test_bit(FLAG_FLUSHABLE, &chan->flags), + (u32 __user *) optval)) err = -EFAULT; break; @@ -460,7 +408,7 @@ static int l2cap_sock_getsockopt(struct socket *sock, int level, int optname, ch break; } - pwr.force_active = chan->force_active; + pwr.force_active = test_bit(FLAG_FORCE_ACTIVE, &chan->flags); len = min_t(unsigned int, len, sizeof(pwr)); if (copy_to_user(optval, (char *) &pwr, len)) @@ -468,6 +416,16 @@ static int l2cap_sock_getsockopt(struct socket *sock, int level, int optname, ch break; + case BT_CHANNEL_POLICY: + if (!enable_hs) { + err = -ENOPROTOOPT; + break; + } + + if (put_user(chan->chan_policy, (u32 __user *) optval)) + err = -EFAULT; + break; + default: err = -ENOPROTOOPT; break; @@ -502,7 +460,7 @@ static int l2cap_sock_setsockopt_old(struct socket *sock, int optname, char __us opts.mode = chan->mode; opts.fcs = chan->fcs; opts.max_tx = chan->max_tx; - opts.txwin_size = (__u16)chan->tx_win; + opts.txwin_size = chan->tx_win; len = min_t(unsigned int, sizeof(opts), optlen); if (copy_from_user((char *) &opts, optval, len)) { @@ -510,7 +468,7 @@ static int l2cap_sock_setsockopt_old(struct socket *sock, int optname, char __us break; } - if (opts.txwin_size > L2CAP_DEFAULT_TX_WINDOW) { + if (opts.txwin_size > L2CAP_DEFAULT_EXT_WINDOW) { err = -EINVAL; break; } @@ -534,7 +492,7 @@ static int l2cap_sock_setsockopt_old(struct socket *sock, int optname, char __us chan->omtu = opts.omtu; chan->fcs = opts.fcs; chan->max_tx = opts.max_tx; - chan->tx_win = (__u8)opts.txwin_size; + chan->tx_win = opts.txwin_size; break; case L2CAP_LM: @@ -550,8 +508,15 @@ static int l2cap_sock_setsockopt_old(struct socket *sock, int optname, char __us if (opt & L2CAP_LM_SECURE) chan->sec_level = BT_SECURITY_HIGH; - chan->role_switch = (opt & L2CAP_LM_MASTER); - chan->force_reliable = (opt & L2CAP_LM_RELIABLE); + if (opt & L2CAP_LM_MASTER) + set_bit(FLAG_ROLE_SWITCH, &chan->flags); + else + clear_bit(FLAG_ROLE_SWITCH, &chan->flags); + + if (opt & L2CAP_LM_RELIABLE) + set_bit(FLAG_FORCE_RELIABLE, &chan->flags); + else + clear_bit(FLAG_FORCE_RELIABLE, &chan->flags); break; default: @@ -607,8 +572,13 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, ch chan->sec_level = sec.level; + if (!chan->conn) + break; + conn = chan->conn; - if (conn && chan->scid == L2CAP_CID_LE_DATA) { + + /*change security for LE channels */ + if (chan->scid == L2CAP_CID_LE_DATA) { if (!conn->hcon->out) { err = -EINVAL; break; @@ -616,9 +586,15 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, ch if (smp_conn_security(conn, sec.level)) break; - - err = 0; sk->sk_state = BT_CONFIG; + chan->state = BT_CONFIG; + + /* or for ACL link, under defer_setup time */ + } else if (sk->sk_state == BT_CONNECT2 && + bt_sk(sk)->defer_setup) { + err = l2cap_chan_check_security(chan); + } else { + err = -EINVAL; } break; @@ -657,7 +633,10 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, ch } } - chan->flushable = opt; + if (opt) + set_bit(FLAG_FLUSHABLE, &chan->flags); + else + clear_bit(FLAG_FLUSHABLE, &chan->flags); break; case BT_POWER: @@ -674,7 +653,36 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, ch err = -EFAULT; break; } - chan->force_active = pwr.force_active; + + if (pwr.force_active) + set_bit(FLAG_FORCE_ACTIVE, &chan->flags); + else + clear_bit(FLAG_FORCE_ACTIVE, &chan->flags); + break; + + case BT_CHANNEL_POLICY: + if (!enable_hs) { + err = -ENOPROTOOPT; + break; + } + + if (get_user(opt, (u32 __user *) optval)) { + err = -EFAULT; + break; + } + + if (opt > BT_CHANNEL_POLICY_AMP_PREFERRED) { + err = -EINVAL; + break; + } + + if (chan->mode != L2CAP_MODE_ERTM && + chan->mode != L2CAP_MODE_STREAMING) { + err = -EOPNOTSUPP; + break; + } + + chan->chan_policy = (u8) opt; break; default: @@ -708,7 +716,7 @@ static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct ms return -ENOTCONN; } - err = l2cap_chan_send(chan, msg, len); + err = l2cap_chan_send(chan, msg, len, sk->sk_priority); release_sock(sk); return err; @@ -724,6 +732,7 @@ static int l2cap_sock_recvmsg(struct kiocb *iocb, struct socket *sock, struct ms if (sk->sk_state == BT_CONNECT2 && bt_sk(sk)->defer_setup) { sk->sk_state = BT_CONFIG; + pi->chan->state = BT_CONFIG; __l2cap_connect_rsp_defer(pi->chan); release_sock(sk); @@ -930,11 +939,9 @@ static void l2cap_sock_init(struct sock *sk, struct sock *parent) chan->fcs = pchan->fcs; chan->max_tx = pchan->max_tx; chan->tx_win = pchan->tx_win; + chan->tx_win_max = pchan->tx_win_max; chan->sec_level = pchan->sec_level; - chan->role_switch = pchan->role_switch; - chan->force_reliable = pchan->force_reliable; - chan->flushable = pchan->flushable; - chan->force_active = pchan->force_active; + chan->flags = pchan->flags; security_sk_clone(parent, sk); } else { @@ -963,12 +970,10 @@ static void l2cap_sock_init(struct sock *sk, struct sock *parent) chan->max_tx = L2CAP_DEFAULT_MAX_TX; chan->fcs = L2CAP_FCS_CRC16; chan->tx_win = L2CAP_DEFAULT_TX_WINDOW; + chan->tx_win_max = L2CAP_DEFAULT_TX_WINDOW; chan->sec_level = BT_SECURITY_LOW; - chan->role_switch = 0; - chan->force_reliable = 0; - chan->flushable = BT_FLUSHABLE_OFF; - chan->force_active = BT_POWER_FORCE_ACTIVE_ON; - + chan->flags = 0; + set_bit(FLAG_FORCE_ACTIVE, &chan->flags); } /* Default config options */ diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 53e109eb043e..bc8e59dda78e 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -22,32 +22,111 @@ /* Bluetooth HCI Management interface */ +#include <linux/kernel.h> #include <linux/uaccess.h> +#include <linux/module.h> #include <asm/unaligned.h> #include <net/bluetooth/bluetooth.h> #include <net/bluetooth/hci_core.h> #include <net/bluetooth/mgmt.h> +#include <net/bluetooth/smp.h> #define MGMT_VERSION 0 #define MGMT_REVISION 1 +#define INQUIRY_LEN_BREDR 0x08 /* TGAP(100) */ + +#define SERVICE_CACHE_TIMEOUT (5 * 1000) + struct pending_cmd { struct list_head list; - __u16 opcode; + u16 opcode; int index; void *param; struct sock *sk; void *user_data; }; -static LIST_HEAD(cmd_list); +/* HCI to MGMT error code conversion table */ +static u8 mgmt_status_table[] = { + MGMT_STATUS_SUCCESS, + MGMT_STATUS_UNKNOWN_COMMAND, /* Unknown Command */ + MGMT_STATUS_NOT_CONNECTED, /* No Connection */ + MGMT_STATUS_FAILED, /* Hardware Failure */ + MGMT_STATUS_CONNECT_FAILED, /* Page Timeout */ + MGMT_STATUS_AUTH_FAILED, /* Authentication Failed */ + MGMT_STATUS_NOT_PAIRED, /* PIN or Key Missing */ + MGMT_STATUS_NO_RESOURCES, /* Memory Full */ + MGMT_STATUS_TIMEOUT, /* Connection Timeout */ + MGMT_STATUS_NO_RESOURCES, /* Max Number of Connections */ + MGMT_STATUS_NO_RESOURCES, /* Max Number of SCO Connections */ + MGMT_STATUS_ALREADY_CONNECTED, /* ACL Connection Exists */ + MGMT_STATUS_BUSY, /* Command Disallowed */ + MGMT_STATUS_NO_RESOURCES, /* Rejected Limited Resources */ + MGMT_STATUS_REJECTED, /* Rejected Security */ + MGMT_STATUS_REJECTED, /* Rejected Personal */ + MGMT_STATUS_TIMEOUT, /* Host Timeout */ + MGMT_STATUS_NOT_SUPPORTED, /* Unsupported Feature */ + MGMT_STATUS_INVALID_PARAMS, /* Invalid Parameters */ + MGMT_STATUS_DISCONNECTED, /* OE User Ended Connection */ + MGMT_STATUS_NO_RESOURCES, /* OE Low Resources */ + MGMT_STATUS_DISCONNECTED, /* OE Power Off */ + MGMT_STATUS_DISCONNECTED, /* Connection Terminated */ + MGMT_STATUS_BUSY, /* Repeated Attempts */ + MGMT_STATUS_REJECTED, /* Pairing Not Allowed */ + MGMT_STATUS_FAILED, /* Unknown LMP PDU */ + MGMT_STATUS_NOT_SUPPORTED, /* Unsupported Remote Feature */ + MGMT_STATUS_REJECTED, /* SCO Offset Rejected */ + MGMT_STATUS_REJECTED, /* SCO Interval Rejected */ + MGMT_STATUS_REJECTED, /* Air Mode Rejected */ + MGMT_STATUS_INVALID_PARAMS, /* Invalid LMP Parameters */ + MGMT_STATUS_FAILED, /* Unspecified Error */ + MGMT_STATUS_NOT_SUPPORTED, /* Unsupported LMP Parameter Value */ + MGMT_STATUS_FAILED, /* Role Change Not Allowed */ + MGMT_STATUS_TIMEOUT, /* LMP Response Timeout */ + MGMT_STATUS_FAILED, /* LMP Error Transaction Collision */ + MGMT_STATUS_FAILED, /* LMP PDU Not Allowed */ + MGMT_STATUS_REJECTED, /* Encryption Mode Not Accepted */ + MGMT_STATUS_FAILED, /* Unit Link Key Used */ + MGMT_STATUS_NOT_SUPPORTED, /* QoS Not Supported */ + MGMT_STATUS_TIMEOUT, /* Instant Passed */ + MGMT_STATUS_NOT_SUPPORTED, /* Pairing Not Supported */ + MGMT_STATUS_FAILED, /* Transaction Collision */ + MGMT_STATUS_INVALID_PARAMS, /* Unacceptable Parameter */ + MGMT_STATUS_REJECTED, /* QoS Rejected */ + MGMT_STATUS_NOT_SUPPORTED, /* Classification Not Supported */ + MGMT_STATUS_REJECTED, /* Insufficient Security */ + MGMT_STATUS_INVALID_PARAMS, /* Parameter Out Of Range */ + MGMT_STATUS_BUSY, /* Role Switch Pending */ + MGMT_STATUS_FAILED, /* Slot Violation */ + MGMT_STATUS_FAILED, /* Role Switch Failed */ + MGMT_STATUS_INVALID_PARAMS, /* EIR Too Large */ + MGMT_STATUS_NOT_SUPPORTED, /* Simple Pairing Not Supported */ + MGMT_STATUS_BUSY, /* Host Busy Pairing */ + MGMT_STATUS_REJECTED, /* Rejected, No Suitable Channel */ + MGMT_STATUS_BUSY, /* Controller Busy */ + MGMT_STATUS_INVALID_PARAMS, /* Unsuitable Connection Interval */ + MGMT_STATUS_TIMEOUT, /* Directed Advertising Timeout */ + MGMT_STATUS_AUTH_FAILED, /* Terminated Due to MIC Failure */ + MGMT_STATUS_CONNECT_FAILED, /* Connection Establishment Failed */ + MGMT_STATUS_CONNECT_FAILED, /* MAC Connection Failed */ +}; + +static u8 mgmt_status(u8 hci_status) +{ + if (hci_status < ARRAY_SIZE(mgmt_status_table)) + return mgmt_status_table[hci_status]; + + return MGMT_STATUS_FAILED; +} static int cmd_status(struct sock *sk, u16 index, u16 cmd, u8 status) { struct sk_buff *skb; struct mgmt_hdr *hdr; struct mgmt_ev_cmd_status *ev; + int err; BT_DBG("sock %p, index %u, cmd %u, status %u", sk, index, cmd, status); @@ -65,10 +144,11 @@ static int cmd_status(struct sock *sk, u16 index, u16 cmd, u8 status) ev->status = status; put_unaligned_le16(cmd, &ev->opcode); - if (sock_queue_rcv_skb(sk, skb) < 0) + err = sock_queue_rcv_skb(sk, skb); + if (err < 0) kfree_skb(skb); - return 0; + return err; } static int cmd_complete(struct sock *sk, u16 index, u16 cmd, void *rp, @@ -77,6 +157,7 @@ static int cmd_complete(struct sock *sk, u16 index, u16 cmd, void *rp, struct sk_buff *skb; struct mgmt_hdr *hdr; struct mgmt_ev_cmd_complete *ev; + int err; BT_DBG("sock %p", sk); @@ -96,10 +177,11 @@ static int cmd_complete(struct sock *sk, u16 index, u16 cmd, void *rp, if (rp) memcpy(ev->data, rp, rp_len); - if (sock_queue_rcv_skb(sk, skb) < 0) + err = sock_queue_rcv_skb(sk, skb); + if (err < 0) kfree_skb(skb); - return 0; + return err;; } static int read_version(struct sock *sk) @@ -119,6 +201,7 @@ static int read_index_list(struct sock *sk) { struct mgmt_rp_read_index_list *rp; struct list_head *p; + struct hci_dev *d; size_t rp_len; u16 count; int i, err; @@ -142,12 +225,9 @@ static int read_index_list(struct sock *sk) put_unaligned_le16(count, &rp->num_controllers); i = 0; - list_for_each(p, &hci_dev_list) { - struct hci_dev *d = list_entry(p, struct hci_dev, list); - - hci_del_off_timer(d); - - set_bit(HCI_MGMT, &d->flags); + list_for_each_entry(d, &hci_dev_list, list) { + if (test_and_clear_bit(HCI_AUTO_OFF, &d->flags)) + cancel_delayed_work(&d->power_off); if (test_bit(HCI_SETUP, &d->flags)) continue; @@ -166,6 +246,262 @@ static int read_index_list(struct sock *sk) return err; } +static u32 get_supported_settings(struct hci_dev *hdev) +{ + u32 settings = 0; + + settings |= MGMT_SETTING_POWERED; + settings |= MGMT_SETTING_CONNECTABLE; + settings |= MGMT_SETTING_FAST_CONNECTABLE; + settings |= MGMT_SETTING_DISCOVERABLE; + settings |= MGMT_SETTING_PAIRABLE; + + if (hdev->features[6] & LMP_SIMPLE_PAIR) + settings |= MGMT_SETTING_SSP; + + if (!(hdev->features[4] & LMP_NO_BREDR)) { + settings |= MGMT_SETTING_BREDR; + settings |= MGMT_SETTING_LINK_SECURITY; + } + + if (hdev->features[4] & LMP_LE) + settings |= MGMT_SETTING_LE; + + return settings; +} + +static u32 get_current_settings(struct hci_dev *hdev) +{ + u32 settings = 0; + + if (test_bit(HCI_UP, &hdev->flags)) + settings |= MGMT_SETTING_POWERED; + else + return settings; + + if (test_bit(HCI_PSCAN, &hdev->flags)) + settings |= MGMT_SETTING_CONNECTABLE; + + if (test_bit(HCI_ISCAN, &hdev->flags)) + settings |= MGMT_SETTING_DISCOVERABLE; + + if (test_bit(HCI_PAIRABLE, &hdev->flags)) + settings |= MGMT_SETTING_PAIRABLE; + + if (!(hdev->features[4] & LMP_NO_BREDR)) + settings |= MGMT_SETTING_BREDR; + + if (hdev->host_features[0] & LMP_HOST_LE) + settings |= MGMT_SETTING_LE; + + if (test_bit(HCI_AUTH, &hdev->flags)) + settings |= MGMT_SETTING_LINK_SECURITY; + + if (hdev->ssp_mode > 0) + settings |= MGMT_SETTING_SSP; + + return settings; +} + +#define EIR_FLAGS 0x01 /* flags */ +#define EIR_UUID16_SOME 0x02 /* 16-bit UUID, more available */ +#define EIR_UUID16_ALL 0x03 /* 16-bit UUID, all listed */ +#define EIR_UUID32_SOME 0x04 /* 32-bit UUID, more available */ +#define EIR_UUID32_ALL 0x05 /* 32-bit UUID, all listed */ +#define EIR_UUID128_SOME 0x06 /* 128-bit UUID, more available */ +#define EIR_UUID128_ALL 0x07 /* 128-bit UUID, all listed */ +#define EIR_NAME_SHORT 0x08 /* shortened local name */ +#define EIR_NAME_COMPLETE 0x09 /* complete local name */ +#define EIR_TX_POWER 0x0A /* transmit power level */ +#define EIR_DEVICE_ID 0x10 /* device ID */ + +#define PNP_INFO_SVCLASS_ID 0x1200 + +static u8 bluetooth_base_uuid[] = { + 0xFB, 0x34, 0x9B, 0x5F, 0x80, 0x00, 0x00, 0x80, + 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +}; + +static u16 get_uuid16(u8 *uuid128) +{ + u32 val; + int i; + + for (i = 0; i < 12; i++) { + if (bluetooth_base_uuid[i] != uuid128[i]) + return 0; + } + + memcpy(&val, &uuid128[12], 4); + + val = le32_to_cpu(val); + if (val > 0xffff) + return 0; + + return (u16) val; +} + +static void create_eir(struct hci_dev *hdev, u8 *data) +{ + u8 *ptr = data; + u16 eir_len = 0; + u16 uuid16_list[HCI_MAX_EIR_LENGTH / sizeof(u16)]; + int i, truncated = 0; + struct bt_uuid *uuid; + size_t name_len; + + name_len = strlen(hdev->dev_name); + + if (name_len > 0) { + /* EIR Data type */ + if (name_len > 48) { + name_len = 48; + ptr[1] = EIR_NAME_SHORT; + } else + ptr[1] = EIR_NAME_COMPLETE; + + /* EIR Data length */ + ptr[0] = name_len + 1; + + memcpy(ptr + 2, hdev->dev_name, name_len); + + eir_len += (name_len + 2); + ptr += (name_len + 2); + } + + memset(uuid16_list, 0, sizeof(uuid16_list)); + + /* Group all UUID16 types */ + list_for_each_entry(uuid, &hdev->uuids, list) { + u16 uuid16; + + uuid16 = get_uuid16(uuid->uuid); + if (uuid16 == 0) + return; + + if (uuid16 < 0x1100) + continue; + + if (uuid16 == PNP_INFO_SVCLASS_ID) + continue; + + /* Stop if not enough space to put next UUID */ + if (eir_len + 2 + sizeof(u16) > HCI_MAX_EIR_LENGTH) { + truncated = 1; + break; + } + + /* Check for duplicates */ + for (i = 0; uuid16_list[i] != 0; i++) + if (uuid16_list[i] == uuid16) + break; + + if (uuid16_list[i] == 0) { + uuid16_list[i] = uuid16; + eir_len += sizeof(u16); + } + } + + if (uuid16_list[0] != 0) { + u8 *length = ptr; + + /* EIR Data type */ + ptr[1] = truncated ? EIR_UUID16_SOME : EIR_UUID16_ALL; + + ptr += 2; + eir_len += 2; + + for (i = 0; uuid16_list[i] != 0; i++) { + *ptr++ = (uuid16_list[i] & 0x00ff); + *ptr++ = (uuid16_list[i] & 0xff00) >> 8; + } + + /* EIR Data length */ + *length = (i * sizeof(u16)) + 1; + } +} + +static int update_eir(struct hci_dev *hdev) +{ + struct hci_cp_write_eir cp; + + if (!(hdev->features[6] & LMP_EXT_INQ)) + return 0; + + if (hdev->ssp_mode == 0) + return 0; + + if (test_bit(HCI_SERVICE_CACHE, &hdev->flags)) + return 0; + + memset(&cp, 0, sizeof(cp)); + + create_eir(hdev, cp.data); + + if (memcmp(cp.data, hdev->eir, sizeof(cp.data)) == 0) + return 0; + + memcpy(hdev->eir, cp.data, sizeof(cp.data)); + + return hci_send_cmd(hdev, HCI_OP_WRITE_EIR, sizeof(cp), &cp); +} + +static u8 get_service_classes(struct hci_dev *hdev) +{ + struct bt_uuid *uuid; + u8 val = 0; + + list_for_each_entry(uuid, &hdev->uuids, list) + val |= uuid->svc_hint; + + return val; +} + +static int update_class(struct hci_dev *hdev) +{ + u8 cod[3]; + + BT_DBG("%s", hdev->name); + + if (test_bit(HCI_SERVICE_CACHE, &hdev->flags)) + return 0; + + cod[0] = hdev->minor_class; + cod[1] = hdev->major_class; + cod[2] = get_service_classes(hdev); + + if (memcmp(cod, hdev->dev_class, 3) == 0) + return 0; + + return hci_send_cmd(hdev, HCI_OP_WRITE_CLASS_OF_DEV, sizeof(cod), cod); +} + +static void service_cache_off(struct work_struct *work) +{ + struct hci_dev *hdev = container_of(work, struct hci_dev, + service_cache.work); + + if (!test_and_clear_bit(HCI_SERVICE_CACHE, &hdev->flags)) + return; + + hci_dev_lock(hdev); + + update_eir(hdev); + update_class(hdev); + + hci_dev_unlock(hdev); +} + +static void mgmt_init_hdev(struct hci_dev *hdev) +{ + if (!test_and_set_bit(HCI_MGMT, &hdev->flags)) + INIT_DELAYED_WORK(&hdev->service_cache, service_cache_off); + + if (!test_and_set_bit(HCI_SERVICE_CACHE, &hdev->flags)) + schedule_delayed_work(&hdev->service_cache, + msecs_to_jiffies(SERVICE_CACHE_TIMEOUT)); +} + static int read_controller_info(struct sock *sk, u16 index) { struct mgmt_rp_read_info rp; @@ -175,40 +511,33 @@ static int read_controller_info(struct sock *sk, u16 index) hdev = hci_dev_get(index); if (!hdev) - return cmd_status(sk, index, MGMT_OP_READ_INFO, ENODEV); + return cmd_status(sk, index, MGMT_OP_READ_INFO, + MGMT_STATUS_INVALID_PARAMS); - hci_del_off_timer(hdev); + if (test_and_clear_bit(HCI_AUTO_OFF, &hdev->flags)) + cancel_delayed_work_sync(&hdev->power_off); - hci_dev_lock_bh(hdev); + hci_dev_lock(hdev); - set_bit(HCI_MGMT, &hdev->flags); + if (test_and_clear_bit(HCI_PI_MGMT_INIT, &hci_pi(sk)->flags)) + mgmt_init_hdev(hdev); memset(&rp, 0, sizeof(rp)); - rp.type = hdev->dev_type; + bacpy(&rp.bdaddr, &hdev->bdaddr); + + rp.version = hdev->hci_ver; - rp.powered = test_bit(HCI_UP, &hdev->flags); - rp.connectable = test_bit(HCI_PSCAN, &hdev->flags); - rp.discoverable = test_bit(HCI_ISCAN, &hdev->flags); - rp.pairable = test_bit(HCI_PSCAN, &hdev->flags); + put_unaligned_le16(hdev->manufacturer, &rp.manufacturer); - if (test_bit(HCI_AUTH, &hdev->flags)) - rp.sec_mode = 3; - else if (hdev->ssp_mode > 0) - rp.sec_mode = 4; - else - rp.sec_mode = 2; + rp.supported_settings = cpu_to_le32(get_supported_settings(hdev)); + rp.current_settings = cpu_to_le32(get_current_settings(hdev)); - bacpy(&rp.bdaddr, &hdev->bdaddr); - memcpy(rp.features, hdev->features, 8); memcpy(rp.dev_class, hdev->dev_class, 3); - put_unaligned_le16(hdev->manufacturer, &rp.manufacturer); - rp.hci_ver = hdev->hci_ver; - put_unaligned_le16(hdev->hci_rev, &rp.hci_rev); memcpy(rp.name, hdev->dev_name, sizeof(hdev->dev_name)); - hci_dev_unlock_bh(hdev); + hci_dev_unlock(hdev); hci_dev_put(hdev); return cmd_complete(sk, index, MGMT_OP_READ_INFO, &rp, sizeof(rp)); @@ -222,7 +551,8 @@ static void mgmt_pending_free(struct pending_cmd *cmd) } static struct pending_cmd *mgmt_pending_add(struct sock *sk, u16 opcode, - u16 index, void *data, u16 len) + struct hci_dev *hdev, + void *data, u16 len) { struct pending_cmd *cmd; @@ -231,7 +561,7 @@ static struct pending_cmd *mgmt_pending_add(struct sock *sk, u16 opcode, return NULL; cmd->opcode = opcode; - cmd->index = index; + cmd->index = hdev->id; cmd->param = kmalloc(len, GFP_ATOMIC); if (!cmd->param) { @@ -245,48 +575,36 @@ static struct pending_cmd *mgmt_pending_add(struct sock *sk, u16 opcode, cmd->sk = sk; sock_hold(sk); - list_add(&cmd->list, &cmd_list); + list_add(&cmd->list, &hdev->mgmt_pending); return cmd; } -static void mgmt_pending_foreach(u16 opcode, int index, +static void mgmt_pending_foreach(u16 opcode, struct hci_dev *hdev, void (*cb)(struct pending_cmd *cmd, void *data), void *data) { struct list_head *p, *n; - list_for_each_safe(p, n, &cmd_list) { + list_for_each_safe(p, n, &hdev->mgmt_pending) { struct pending_cmd *cmd; cmd = list_entry(p, struct pending_cmd, list); - if (cmd->opcode != opcode) - continue; - - if (index >= 0 && cmd->index != index) + if (opcode > 0 && cmd->opcode != opcode) continue; cb(cmd, data); } } -static struct pending_cmd *mgmt_pending_find(u16 opcode, int index) +static struct pending_cmd *mgmt_pending_find(u16 opcode, struct hci_dev *hdev) { - struct list_head *p; - - list_for_each(p, &cmd_list) { - struct pending_cmd *cmd; - - cmd = list_entry(p, struct pending_cmd, list); - - if (cmd->opcode != opcode) - continue; - - if (index >= 0 && cmd->index != index) - continue; + struct pending_cmd *cmd; - return cmd; + list_for_each_entry(cmd, &hdev->mgmt_pending, list) { + if (cmd->opcode == opcode) + return cmd; } return NULL; @@ -298,6 +616,13 @@ static void mgmt_pending_remove(struct pending_cmd *cmd) mgmt_pending_free(cmd); } +static int send_settings_rsp(struct sock *sk, u16 opcode, struct hci_dev *hdev) +{ + __le32 settings = cpu_to_le32(get_current_settings(hdev)); + + return cmd_complete(sk, hdev->id, opcode, &settings, sizeof(settings)); +} + static int set_powered(struct sock *sk, u16 index, unsigned char *data, u16 len) { struct mgmt_mode *cp; @@ -310,40 +635,43 @@ static int set_powered(struct sock *sk, u16 index, unsigned char *data, u16 len) BT_DBG("request for hci%u", index); if (len != sizeof(*cp)) - return cmd_status(sk, index, MGMT_OP_SET_POWERED, EINVAL); + return cmd_status(sk, index, MGMT_OP_SET_POWERED, + MGMT_STATUS_INVALID_PARAMS); hdev = hci_dev_get(index); if (!hdev) - return cmd_status(sk, index, MGMT_OP_SET_POWERED, ENODEV); + return cmd_status(sk, index, MGMT_OP_SET_POWERED, + MGMT_STATUS_INVALID_PARAMS); - hci_dev_lock_bh(hdev); + hci_dev_lock(hdev); up = test_bit(HCI_UP, &hdev->flags); if ((cp->val && up) || (!cp->val && !up)) { - err = cmd_status(sk, index, MGMT_OP_SET_POWERED, EALREADY); + err = send_settings_rsp(sk, MGMT_OP_SET_POWERED, hdev); goto failed; } - if (mgmt_pending_find(MGMT_OP_SET_POWERED, index)) { - err = cmd_status(sk, index, MGMT_OP_SET_POWERED, EBUSY); + if (mgmt_pending_find(MGMT_OP_SET_POWERED, hdev)) { + err = cmd_status(sk, index, MGMT_OP_SET_POWERED, + MGMT_STATUS_BUSY); goto failed; } - cmd = mgmt_pending_add(sk, MGMT_OP_SET_POWERED, index, data, len); + cmd = mgmt_pending_add(sk, MGMT_OP_SET_POWERED, hdev, data, len); if (!cmd) { err = -ENOMEM; goto failed; } if (cp->val) - queue_work(hdev->workqueue, &hdev->power_on); + schedule_work(&hdev->power_on); else - queue_work(hdev->workqueue, &hdev->power_off); + schedule_work(&hdev->power_off.work); err = 0; failed: - hci_dev_unlock_bh(hdev); + hci_dev_unlock(hdev); hci_dev_put(hdev); return err; } @@ -351,7 +679,7 @@ failed: static int set_discoverable(struct sock *sk, u16 index, unsigned char *data, u16 len) { - struct mgmt_mode *cp; + struct mgmt_cp_set_discoverable *cp; struct hci_dev *hdev; struct pending_cmd *cmd; u8 scan; @@ -362,32 +690,36 @@ static int set_discoverable(struct sock *sk, u16 index, unsigned char *data, BT_DBG("request for hci%u", index); if (len != sizeof(*cp)) - return cmd_status(sk, index, MGMT_OP_SET_DISCOVERABLE, EINVAL); + return cmd_status(sk, index, MGMT_OP_SET_DISCOVERABLE, + MGMT_STATUS_INVALID_PARAMS); hdev = hci_dev_get(index); if (!hdev) - return cmd_status(sk, index, MGMT_OP_SET_DISCOVERABLE, ENODEV); + return cmd_status(sk, index, MGMT_OP_SET_DISCOVERABLE, + MGMT_STATUS_INVALID_PARAMS); - hci_dev_lock_bh(hdev); + hci_dev_lock(hdev); if (!test_bit(HCI_UP, &hdev->flags)) { - err = cmd_status(sk, index, MGMT_OP_SET_DISCOVERABLE, ENETDOWN); + err = cmd_status(sk, index, MGMT_OP_SET_DISCOVERABLE, + MGMT_STATUS_NOT_POWERED); goto failed; } - if (mgmt_pending_find(MGMT_OP_SET_DISCOVERABLE, index) || - mgmt_pending_find(MGMT_OP_SET_CONNECTABLE, index)) { - err = cmd_status(sk, index, MGMT_OP_SET_DISCOVERABLE, EBUSY); + if (mgmt_pending_find(MGMT_OP_SET_DISCOVERABLE, hdev) || + mgmt_pending_find(MGMT_OP_SET_CONNECTABLE, hdev)) { + err = cmd_status(sk, index, MGMT_OP_SET_DISCOVERABLE, + MGMT_STATUS_BUSY); goto failed; } if (cp->val == test_bit(HCI_ISCAN, &hdev->flags) && test_bit(HCI_PSCAN, &hdev->flags)) { - err = cmd_status(sk, index, MGMT_OP_SET_DISCOVERABLE, EALREADY); + err = send_settings_rsp(sk, MGMT_OP_SET_DISCOVERABLE, hdev); goto failed; } - cmd = mgmt_pending_add(sk, MGMT_OP_SET_DISCOVERABLE, index, data, len); + cmd = mgmt_pending_add(sk, MGMT_OP_SET_DISCOVERABLE, hdev, data, len); if (!cmd) { err = -ENOMEM; goto failed; @@ -397,13 +729,18 @@ static int set_discoverable(struct sock *sk, u16 index, unsigned char *data, if (cp->val) scan |= SCAN_INQUIRY; + else + cancel_delayed_work(&hdev->discov_off); err = hci_send_cmd(hdev, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan); if (err < 0) mgmt_pending_remove(cmd); + if (cp->val) + hdev->discov_timeout = get_unaligned_le16(&cp->timeout); + failed: - hci_dev_unlock_bh(hdev); + hci_dev_unlock(hdev); hci_dev_put(hdev); return err; @@ -423,31 +760,35 @@ static int set_connectable(struct sock *sk, u16 index, unsigned char *data, BT_DBG("request for hci%u", index); if (len != sizeof(*cp)) - return cmd_status(sk, index, MGMT_OP_SET_CONNECTABLE, EINVAL); + return cmd_status(sk, index, MGMT_OP_SET_CONNECTABLE, + MGMT_STATUS_INVALID_PARAMS); hdev = hci_dev_get(index); if (!hdev) - return cmd_status(sk, index, MGMT_OP_SET_CONNECTABLE, ENODEV); + return cmd_status(sk, index, MGMT_OP_SET_CONNECTABLE, + MGMT_STATUS_INVALID_PARAMS); - hci_dev_lock_bh(hdev); + hci_dev_lock(hdev); if (!test_bit(HCI_UP, &hdev->flags)) { - err = cmd_status(sk, index, MGMT_OP_SET_CONNECTABLE, ENETDOWN); + err = cmd_status(sk, index, MGMT_OP_SET_CONNECTABLE, + MGMT_STATUS_NOT_POWERED); goto failed; } - if (mgmt_pending_find(MGMT_OP_SET_DISCOVERABLE, index) || - mgmt_pending_find(MGMT_OP_SET_CONNECTABLE, index)) { - err = cmd_status(sk, index, MGMT_OP_SET_CONNECTABLE, EBUSY); + if (mgmt_pending_find(MGMT_OP_SET_DISCOVERABLE, hdev) || + mgmt_pending_find(MGMT_OP_SET_CONNECTABLE, hdev)) { + err = cmd_status(sk, index, MGMT_OP_SET_CONNECTABLE, + MGMT_STATUS_BUSY); goto failed; } if (cp->val == test_bit(HCI_PSCAN, &hdev->flags)) { - err = cmd_status(sk, index, MGMT_OP_SET_CONNECTABLE, EALREADY); + err = send_settings_rsp(sk, MGMT_OP_SET_CONNECTABLE, hdev); goto failed; } - cmd = mgmt_pending_add(sk, MGMT_OP_SET_CONNECTABLE, index, data, len); + cmd = mgmt_pending_add(sk, MGMT_OP_SET_CONNECTABLE, hdev, data, len); if (!cmd) { err = -ENOMEM; goto failed; @@ -463,14 +804,14 @@ static int set_connectable(struct sock *sk, u16 index, unsigned char *data, mgmt_pending_remove(cmd); failed: - hci_dev_unlock_bh(hdev); + hci_dev_unlock(hdev); hci_dev_put(hdev); return err; } -static int mgmt_event(u16 event, u16 index, void *data, u16 data_len, - struct sock *skip_sk) +static int mgmt_event(u16 event, struct hci_dev *hdev, void *data, + u16 data_len, struct sock *skip_sk) { struct sk_buff *skb; struct mgmt_hdr *hdr; @@ -483,7 +824,10 @@ static int mgmt_event(u16 event, u16 index, void *data, u16 data_len, hdr = (void *) skb_put(skb, sizeof(*hdr)); hdr->opcode = cpu_to_le16(event); - hdr->index = cpu_to_le16(index); + if (hdev) + hdr->index = cpu_to_le16(hdev->id); + else + hdr->index = cpu_to_le16(MGMT_INDEX_NONE); hdr->len = cpu_to_le16(data_len); if (data) @@ -495,20 +839,12 @@ static int mgmt_event(u16 event, u16 index, void *data, u16 data_len, return 0; } -static int send_mode_rsp(struct sock *sk, u16 opcode, u16 index, u8 val) -{ - struct mgmt_mode rp; - - rp.val = val; - - return cmd_complete(sk, index, opcode, &rp, sizeof(rp)); -} - static int set_pairable(struct sock *sk, u16 index, unsigned char *data, u16 len) { - struct mgmt_mode *cp, ev; + struct mgmt_mode *cp; struct hci_dev *hdev; + __le32 ev; int err; cp = (void *) data; @@ -516,211 +852,36 @@ static int set_pairable(struct sock *sk, u16 index, unsigned char *data, BT_DBG("request for hci%u", index); if (len != sizeof(*cp)) - return cmd_status(sk, index, MGMT_OP_SET_PAIRABLE, EINVAL); + return cmd_status(sk, index, MGMT_OP_SET_PAIRABLE, + MGMT_STATUS_INVALID_PARAMS); hdev = hci_dev_get(index); if (!hdev) - return cmd_status(sk, index, MGMT_OP_SET_PAIRABLE, ENODEV); + return cmd_status(sk, index, MGMT_OP_SET_PAIRABLE, + MGMT_STATUS_INVALID_PARAMS); - hci_dev_lock_bh(hdev); + hci_dev_lock(hdev); if (cp->val) set_bit(HCI_PAIRABLE, &hdev->flags); else clear_bit(HCI_PAIRABLE, &hdev->flags); - err = send_mode_rsp(sk, MGMT_OP_SET_PAIRABLE, index, cp->val); + err = send_settings_rsp(sk, MGMT_OP_SET_PAIRABLE, hdev); if (err < 0) goto failed; - ev.val = cp->val; + ev = cpu_to_le32(get_current_settings(hdev)); - err = mgmt_event(MGMT_EV_PAIRABLE, index, &ev, sizeof(ev), sk); + err = mgmt_event(MGMT_EV_NEW_SETTINGS, hdev, &ev, sizeof(ev), sk); failed: - hci_dev_unlock_bh(hdev); + hci_dev_unlock(hdev); hci_dev_put(hdev); return err; } -#define EIR_FLAGS 0x01 /* flags */ -#define EIR_UUID16_SOME 0x02 /* 16-bit UUID, more available */ -#define EIR_UUID16_ALL 0x03 /* 16-bit UUID, all listed */ -#define EIR_UUID32_SOME 0x04 /* 32-bit UUID, more available */ -#define EIR_UUID32_ALL 0x05 /* 32-bit UUID, all listed */ -#define EIR_UUID128_SOME 0x06 /* 128-bit UUID, more available */ -#define EIR_UUID128_ALL 0x07 /* 128-bit UUID, all listed */ -#define EIR_NAME_SHORT 0x08 /* shortened local name */ -#define EIR_NAME_COMPLETE 0x09 /* complete local name */ -#define EIR_TX_POWER 0x0A /* transmit power level */ -#define EIR_DEVICE_ID 0x10 /* device ID */ - -#define PNP_INFO_SVCLASS_ID 0x1200 - -static u8 bluetooth_base_uuid[] = { - 0xFB, 0x34, 0x9B, 0x5F, 0x80, 0x00, 0x00, 0x80, - 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -}; - -static u16 get_uuid16(u8 *uuid128) -{ - u32 val; - int i; - - for (i = 0; i < 12; i++) { - if (bluetooth_base_uuid[i] != uuid128[i]) - return 0; - } - - memcpy(&val, &uuid128[12], 4); - - val = le32_to_cpu(val); - if (val > 0xffff) - return 0; - - return (u16) val; -} - -static void create_eir(struct hci_dev *hdev, u8 *data) -{ - u8 *ptr = data; - u16 eir_len = 0; - u16 uuid16_list[HCI_MAX_EIR_LENGTH / sizeof(u16)]; - int i, truncated = 0; - struct list_head *p; - size_t name_len; - - name_len = strlen(hdev->dev_name); - - if (name_len > 0) { - /* EIR Data type */ - if (name_len > 48) { - name_len = 48; - ptr[1] = EIR_NAME_SHORT; - } else - ptr[1] = EIR_NAME_COMPLETE; - - /* EIR Data length */ - ptr[0] = name_len + 1; - - memcpy(ptr + 2, hdev->dev_name, name_len); - - eir_len += (name_len + 2); - ptr += (name_len + 2); - } - - memset(uuid16_list, 0, sizeof(uuid16_list)); - - /* Group all UUID16 types */ - list_for_each(p, &hdev->uuids) { - struct bt_uuid *uuid = list_entry(p, struct bt_uuid, list); - u16 uuid16; - - uuid16 = get_uuid16(uuid->uuid); - if (uuid16 == 0) - return; - - if (uuid16 < 0x1100) - continue; - - if (uuid16 == PNP_INFO_SVCLASS_ID) - continue; - - /* Stop if not enough space to put next UUID */ - if (eir_len + 2 + sizeof(u16) > HCI_MAX_EIR_LENGTH) { - truncated = 1; - break; - } - - /* Check for duplicates */ - for (i = 0; uuid16_list[i] != 0; i++) - if (uuid16_list[i] == uuid16) - break; - - if (uuid16_list[i] == 0) { - uuid16_list[i] = uuid16; - eir_len += sizeof(u16); - } - } - - if (uuid16_list[0] != 0) { - u8 *length = ptr; - - /* EIR Data type */ - ptr[1] = truncated ? EIR_UUID16_SOME : EIR_UUID16_ALL; - - ptr += 2; - eir_len += 2; - - for (i = 0; uuid16_list[i] != 0; i++) { - *ptr++ = (uuid16_list[i] & 0x00ff); - *ptr++ = (uuid16_list[i] & 0xff00) >> 8; - } - - /* EIR Data length */ - *length = (i * sizeof(u16)) + 1; - } -} - -static int update_eir(struct hci_dev *hdev) -{ - struct hci_cp_write_eir cp; - - if (!(hdev->features[6] & LMP_EXT_INQ)) - return 0; - - if (hdev->ssp_mode == 0) - return 0; - - if (test_bit(HCI_SERVICE_CACHE, &hdev->flags)) - return 0; - - memset(&cp, 0, sizeof(cp)); - - create_eir(hdev, cp.data); - - if (memcmp(cp.data, hdev->eir, sizeof(cp.data)) == 0) - return 0; - - memcpy(hdev->eir, cp.data, sizeof(cp.data)); - - return hci_send_cmd(hdev, HCI_OP_WRITE_EIR, sizeof(cp), &cp); -} - -static u8 get_service_classes(struct hci_dev *hdev) -{ - struct list_head *p; - u8 val = 0; - - list_for_each(p, &hdev->uuids) { - struct bt_uuid *uuid = list_entry(p, struct bt_uuid, list); - - val |= uuid->svc_hint; - } - - return val; -} - -static int update_class(struct hci_dev *hdev) -{ - u8 cod[3]; - - BT_DBG("%s", hdev->name); - - if (test_bit(HCI_SERVICE_CACHE, &hdev->flags)) - return 0; - - cod[0] = hdev->minor_class; - cod[1] = hdev->major_class; - cod[2] = get_service_classes(hdev); - - if (memcmp(cod, hdev->dev_class, 3) == 0) - return 0; - - return hci_send_cmd(hdev, HCI_OP_WRITE_CLASS_OF_DEV, sizeof(cod), cod); -} - static int add_uuid(struct sock *sk, u16 index, unsigned char *data, u16 len) { struct mgmt_cp_add_uuid *cp; @@ -733,13 +894,15 @@ static int add_uuid(struct sock *sk, u16 index, unsigned char *data, u16 len) BT_DBG("request for hci%u", index); if (len != sizeof(*cp)) - return cmd_status(sk, index, MGMT_OP_ADD_UUID, EINVAL); + return cmd_status(sk, index, MGMT_OP_ADD_UUID, + MGMT_STATUS_INVALID_PARAMS); hdev = hci_dev_get(index); if (!hdev) - return cmd_status(sk, index, MGMT_OP_ADD_UUID, ENODEV); + return cmd_status(sk, index, MGMT_OP_ADD_UUID, + MGMT_STATUS_INVALID_PARAMS); - hci_dev_lock_bh(hdev); + hci_dev_lock(hdev); uuid = kmalloc(sizeof(*uuid), GFP_ATOMIC); if (!uuid) { @@ -763,7 +926,7 @@ static int add_uuid(struct sock *sk, u16 index, unsigned char *data, u16 len) err = cmd_complete(sk, index, MGMT_OP_ADD_UUID, NULL, 0); failed: - hci_dev_unlock_bh(hdev); + hci_dev_unlock(hdev); hci_dev_put(hdev); return err; @@ -782,13 +945,15 @@ static int remove_uuid(struct sock *sk, u16 index, unsigned char *data, u16 len) BT_DBG("request for hci%u", index); if (len != sizeof(*cp)) - return cmd_status(sk, index, MGMT_OP_REMOVE_UUID, EINVAL); + return cmd_status(sk, index, MGMT_OP_REMOVE_UUID, + MGMT_STATUS_INVALID_PARAMS); hdev = hci_dev_get(index); if (!hdev) - return cmd_status(sk, index, MGMT_OP_REMOVE_UUID, ENODEV); + return cmd_status(sk, index, MGMT_OP_REMOVE_UUID, + MGMT_STATUS_INVALID_PARAMS); - hci_dev_lock_bh(hdev); + hci_dev_lock(hdev); if (memcmp(cp->uuid, bt_uuid_any, 16) == 0) { err = hci_uuids_clear(hdev); @@ -808,7 +973,8 @@ static int remove_uuid(struct sock *sk, u16 index, unsigned char *data, u16 len) } if (found == 0) { - err = cmd_status(sk, index, MGMT_OP_REMOVE_UUID, ENOENT); + err = cmd_status(sk, index, MGMT_OP_REMOVE_UUID, + MGMT_STATUS_INVALID_PARAMS); goto unlock; } @@ -823,7 +989,7 @@ static int remove_uuid(struct sock *sk, u16 index, unsigned char *data, u16 len) err = cmd_complete(sk, index, MGMT_OP_REMOVE_UUID, NULL, 0); unlock: - hci_dev_unlock_bh(hdev); + hci_dev_unlock(hdev); hci_dev_put(hdev); return err; @@ -841,97 +1007,71 @@ static int set_dev_class(struct sock *sk, u16 index, unsigned char *data, BT_DBG("request for hci%u", index); if (len != sizeof(*cp)) - return cmd_status(sk, index, MGMT_OP_SET_DEV_CLASS, EINVAL); + return cmd_status(sk, index, MGMT_OP_SET_DEV_CLASS, + MGMT_STATUS_INVALID_PARAMS); hdev = hci_dev_get(index); if (!hdev) - return cmd_status(sk, index, MGMT_OP_SET_DEV_CLASS, ENODEV); + return cmd_status(sk, index, MGMT_OP_SET_DEV_CLASS, + MGMT_STATUS_INVALID_PARAMS); - hci_dev_lock_bh(hdev); + hci_dev_lock(hdev); hdev->major_class = cp->major; hdev->minor_class = cp->minor; + if (test_and_clear_bit(HCI_SERVICE_CACHE, &hdev->flags)) { + hci_dev_unlock(hdev); + cancel_delayed_work_sync(&hdev->service_cache); + hci_dev_lock(hdev); + update_eir(hdev); + } + err = update_class(hdev); if (err == 0) err = cmd_complete(sk, index, MGMT_OP_SET_DEV_CLASS, NULL, 0); - hci_dev_unlock_bh(hdev); + hci_dev_unlock(hdev); hci_dev_put(hdev); return err; } -static int set_service_cache(struct sock *sk, u16 index, unsigned char *data, - u16 len) -{ - struct hci_dev *hdev; - struct mgmt_cp_set_service_cache *cp; - int err; - - cp = (void *) data; - - if (len != sizeof(*cp)) - return cmd_status(sk, index, MGMT_OP_SET_SERVICE_CACHE, EINVAL); - - hdev = hci_dev_get(index); - if (!hdev) - return cmd_status(sk, index, MGMT_OP_SET_SERVICE_CACHE, ENODEV); - - hci_dev_lock_bh(hdev); - - BT_DBG("hci%u enable %d", index, cp->enable); - - if (cp->enable) { - set_bit(HCI_SERVICE_CACHE, &hdev->flags); - err = 0; - } else { - clear_bit(HCI_SERVICE_CACHE, &hdev->flags); - err = update_class(hdev); - if (err == 0) - err = update_eir(hdev); - } - - if (err == 0) - err = cmd_complete(sk, index, MGMT_OP_SET_SERVICE_CACHE, NULL, - 0); - - hci_dev_unlock_bh(hdev); - hci_dev_put(hdev); - - return err; -} - -static int load_keys(struct sock *sk, u16 index, unsigned char *data, u16 len) +static int load_link_keys(struct sock *sk, u16 index, unsigned char *data, + u16 len) { struct hci_dev *hdev; - struct mgmt_cp_load_keys *cp; + struct mgmt_cp_load_link_keys *cp; u16 key_count, expected_len; - int i, err; + int i; cp = (void *) data; if (len < sizeof(*cp)) - return -EINVAL; + return cmd_status(sk, index, MGMT_OP_LOAD_LINK_KEYS, + MGMT_STATUS_INVALID_PARAMS); key_count = get_unaligned_le16(&cp->key_count); - expected_len = sizeof(*cp) + key_count * sizeof(struct mgmt_key_info); - if (expected_len > len) { - BT_ERR("load_keys: expected at least %u bytes, got %u bytes", - expected_len, len); - return -EINVAL; + expected_len = sizeof(*cp) + key_count * + sizeof(struct mgmt_link_key_info); + if (expected_len != len) { + BT_ERR("load_link_keys: expected %u bytes, got %u bytes", + len, expected_len); + return cmd_status(sk, index, MGMT_OP_LOAD_LINK_KEYS, + MGMT_STATUS_INVALID_PARAMS); } hdev = hci_dev_get(index); if (!hdev) - return cmd_status(sk, index, MGMT_OP_LOAD_KEYS, ENODEV); + return cmd_status(sk, index, MGMT_OP_LOAD_LINK_KEYS, + MGMT_STATUS_INVALID_PARAMS); BT_DBG("hci%u debug_keys %u key_count %u", index, cp->debug_keys, key_count); - hci_dev_lock_bh(hdev); + hci_dev_lock(hdev); hci_link_keys_clear(hdev); @@ -942,78 +1082,85 @@ static int load_keys(struct sock *sk, u16 index, unsigned char *data, u16 len) else clear_bit(HCI_DEBUG_KEYS, &hdev->flags); - len -= sizeof(*cp); - i = 0; - - while (i < len) { - struct mgmt_key_info *key = (void *) cp->keys + i; - - i += sizeof(*key) + key->dlen; - - if (key->type == HCI_LK_SMP_LTK) { - struct key_master_id *id = (void *) key->data; - - if (key->dlen != sizeof(struct key_master_id)) - continue; - - hci_add_ltk(hdev, 0, &key->bdaddr, key->pin_len, - id->ediv, id->rand, key->val); - - continue; - } + for (i = 0; i < key_count; i++) { + struct mgmt_link_key_info *key = &cp->keys[i]; hci_add_link_key(hdev, NULL, 0, &key->bdaddr, key->val, key->type, key->pin_len); } - err = cmd_complete(sk, index, MGMT_OP_LOAD_KEYS, NULL, 0); + cmd_complete(sk, index, MGMT_OP_LOAD_LINK_KEYS, NULL, 0); - hci_dev_unlock_bh(hdev); + hci_dev_unlock(hdev); hci_dev_put(hdev); - return err; + return 0; } -static int remove_key(struct sock *sk, u16 index, unsigned char *data, u16 len) +static int remove_keys(struct sock *sk, u16 index, unsigned char *data, + u16 len) { struct hci_dev *hdev; - struct mgmt_cp_remove_key *cp; + struct mgmt_cp_remove_keys *cp; + struct mgmt_rp_remove_keys rp; + struct hci_cp_disconnect dc; + struct pending_cmd *cmd; struct hci_conn *conn; int err; cp = (void *) data; if (len != sizeof(*cp)) - return cmd_status(sk, index, MGMT_OP_REMOVE_KEY, EINVAL); + return cmd_status(sk, index, MGMT_OP_REMOVE_KEYS, + MGMT_STATUS_INVALID_PARAMS); hdev = hci_dev_get(index); if (!hdev) - return cmd_status(sk, index, MGMT_OP_REMOVE_KEY, ENODEV); + return cmd_status(sk, index, MGMT_OP_REMOVE_KEYS, + MGMT_STATUS_INVALID_PARAMS); + + hci_dev_lock(hdev); - hci_dev_lock_bh(hdev); + memset(&rp, 0, sizeof(rp)); + bacpy(&rp.bdaddr, &cp->bdaddr); + rp.status = MGMT_STATUS_FAILED; err = hci_remove_link_key(hdev, &cp->bdaddr); if (err < 0) { - err = cmd_status(sk, index, MGMT_OP_REMOVE_KEY, -err); + rp.status = MGMT_STATUS_NOT_PAIRED; goto unlock; } - err = 0; - - if (!test_bit(HCI_UP, &hdev->flags) || !cp->disconnect) + if (!test_bit(HCI_UP, &hdev->flags) || !cp->disconnect) { + err = cmd_complete(sk, index, MGMT_OP_REMOVE_KEYS, &rp, + sizeof(rp)); goto unlock; + } conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &cp->bdaddr); - if (conn) { - struct hci_cp_disconnect dc; + if (!conn) { + err = cmd_complete(sk, index, MGMT_OP_REMOVE_KEYS, &rp, + sizeof(rp)); + goto unlock; + } - put_unaligned_le16(conn->handle, &dc.handle); - dc.reason = 0x13; /* Remote User Terminated Connection */ - err = hci_send_cmd(hdev, HCI_OP_DISCONNECT, sizeof(dc), &dc); + cmd = mgmt_pending_add(sk, MGMT_OP_REMOVE_KEYS, hdev, cp, sizeof(*cp)); + if (!cmd) { + err = -ENOMEM; + goto unlock; } + put_unaligned_le16(conn->handle, &dc.handle); + dc.reason = 0x13; /* Remote User Terminated Connection */ + err = hci_send_cmd(hdev, HCI_OP_DISCONNECT, sizeof(dc), &dc); + if (err < 0) + mgmt_pending_remove(cmd); + unlock: - hci_dev_unlock_bh(hdev); + if (err < 0) + err = cmd_complete(sk, index, MGMT_OP_REMOVE_KEYS, &rp, + sizeof(rp)); + hci_dev_unlock(hdev); hci_dev_put(hdev); return err; @@ -1033,21 +1180,25 @@ static int disconnect(struct sock *sk, u16 index, unsigned char *data, u16 len) cp = (void *) data; if (len != sizeof(*cp)) - return cmd_status(sk, index, MGMT_OP_DISCONNECT, EINVAL); + return cmd_status(sk, index, MGMT_OP_DISCONNECT, + MGMT_STATUS_INVALID_PARAMS); hdev = hci_dev_get(index); if (!hdev) - return cmd_status(sk, index, MGMT_OP_DISCONNECT, ENODEV); + return cmd_status(sk, index, MGMT_OP_DISCONNECT, + MGMT_STATUS_INVALID_PARAMS); - hci_dev_lock_bh(hdev); + hci_dev_lock(hdev); if (!test_bit(HCI_UP, &hdev->flags)) { - err = cmd_status(sk, index, MGMT_OP_DISCONNECT, ENETDOWN); + err = cmd_status(sk, index, MGMT_OP_DISCONNECT, + MGMT_STATUS_NOT_POWERED); goto failed; } - if (mgmt_pending_find(MGMT_OP_DISCONNECT, index)) { - err = cmd_status(sk, index, MGMT_OP_DISCONNECT, EBUSY); + if (mgmt_pending_find(MGMT_OP_DISCONNECT, hdev)) { + err = cmd_status(sk, index, MGMT_OP_DISCONNECT, + MGMT_STATUS_BUSY); goto failed; } @@ -1056,11 +1207,12 @@ static int disconnect(struct sock *sk, u16 index, unsigned char *data, u16 len) conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, &cp->bdaddr); if (!conn) { - err = cmd_status(sk, index, MGMT_OP_DISCONNECT, ENOTCONN); + err = cmd_status(sk, index, MGMT_OP_DISCONNECT, + MGMT_STATUS_NOT_CONNECTED); goto failed; } - cmd = mgmt_pending_add(sk, MGMT_OP_DISCONNECT, index, data, len); + cmd = mgmt_pending_add(sk, MGMT_OP_DISCONNECT, hdev, data, len); if (!cmd) { err = -ENOMEM; goto failed; @@ -1074,16 +1226,36 @@ static int disconnect(struct sock *sk, u16 index, unsigned char *data, u16 len) mgmt_pending_remove(cmd); failed: - hci_dev_unlock_bh(hdev); + hci_dev_unlock(hdev); hci_dev_put(hdev); return err; } +static u8 link_to_mgmt(u8 link_type, u8 addr_type) +{ + switch (link_type) { + case LE_LINK: + switch (addr_type) { + case ADDR_LE_DEV_PUBLIC: + return MGMT_ADDR_LE_PUBLIC; + case ADDR_LE_DEV_RANDOM: + return MGMT_ADDR_LE_RANDOM; + default: + return MGMT_ADDR_INVALID; + } + case ACL_LINK: + return MGMT_ADDR_BREDR; + default: + return MGMT_ADDR_INVALID; + } +} + static int get_connections(struct sock *sk, u16 index) { struct mgmt_rp_get_connections *rp; struct hci_dev *hdev; + struct hci_conn *c; struct list_head *p; size_t rp_len; u16 count; @@ -1093,16 +1265,17 @@ static int get_connections(struct sock *sk, u16 index) hdev = hci_dev_get(index); if (!hdev) - return cmd_status(sk, index, MGMT_OP_GET_CONNECTIONS, ENODEV); + return cmd_status(sk, index, MGMT_OP_GET_CONNECTIONS, + MGMT_STATUS_INVALID_PARAMS); - hci_dev_lock_bh(hdev); + hci_dev_lock(hdev); count = 0; list_for_each(p, &hdev->conn_hash.list) { count++; } - rp_len = sizeof(*rp) + (count * sizeof(bdaddr_t)); + rp_len = sizeof(*rp) + (count * sizeof(struct mgmt_addr_info)); rp = kmalloc(rp_len, GFP_ATOMIC); if (!rp) { err = -ENOMEM; @@ -1112,17 +1285,22 @@ static int get_connections(struct sock *sk, u16 index) put_unaligned_le16(count, &rp->conn_count); i = 0; - list_for_each(p, &hdev->conn_hash.list) { - struct hci_conn *c = list_entry(p, struct hci_conn, list); - - bacpy(&rp->conn[i++], &c->dst); + list_for_each_entry(c, &hdev->conn_hash.list, list) { + bacpy(&rp->addr[i].bdaddr, &c->dst); + rp->addr[i].type = link_to_mgmt(c->type, c->dst_type); + if (rp->addr[i].type == MGMT_ADDR_INVALID) + continue; + i++; } + /* Recalculate length in case of filtered SCO connections, etc */ + rp_len = sizeof(*rp) + (i * sizeof(struct mgmt_addr_info)); + err = cmd_complete(sk, index, MGMT_OP_GET_CONNECTIONS, rp, rp_len); unlock: kfree(rp); - hci_dev_unlock_bh(hdev); + hci_dev_unlock(hdev); hci_dev_put(hdev); return err; } @@ -1133,7 +1311,7 @@ static int send_pin_code_neg_reply(struct sock *sk, u16 index, struct pending_cmd *cmd; int err; - cmd = mgmt_pending_add(sk, MGMT_OP_PIN_CODE_NEG_REPLY, index, cp, + cmd = mgmt_pending_add(sk, MGMT_OP_PIN_CODE_NEG_REPLY, hdev, cp, sizeof(*cp)); if (!cmd) return -ENOMEM; @@ -1162,22 +1340,26 @@ static int pin_code_reply(struct sock *sk, u16 index, unsigned char *data, cp = (void *) data; if (len != sizeof(*cp)) - return cmd_status(sk, index, MGMT_OP_PIN_CODE_REPLY, EINVAL); + return cmd_status(sk, index, MGMT_OP_PIN_CODE_REPLY, + MGMT_STATUS_INVALID_PARAMS); hdev = hci_dev_get(index); if (!hdev) - return cmd_status(sk, index, MGMT_OP_PIN_CODE_REPLY, ENODEV); + return cmd_status(sk, index, MGMT_OP_PIN_CODE_REPLY, + MGMT_STATUS_INVALID_PARAMS); - hci_dev_lock_bh(hdev); + hci_dev_lock(hdev); if (!test_bit(HCI_UP, &hdev->flags)) { - err = cmd_status(sk, index, MGMT_OP_PIN_CODE_REPLY, ENETDOWN); + err = cmd_status(sk, index, MGMT_OP_PIN_CODE_REPLY, + MGMT_STATUS_NOT_POWERED); goto failed; } conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &cp->bdaddr); if (!conn) { - err = cmd_status(sk, index, MGMT_OP_PIN_CODE_REPLY, ENOTCONN); + err = cmd_status(sk, index, MGMT_OP_PIN_CODE_REPLY, + MGMT_STATUS_NOT_CONNECTED); goto failed; } @@ -1189,12 +1371,12 @@ static int pin_code_reply(struct sock *sk, u16 index, unsigned char *data, err = send_pin_code_neg_reply(sk, index, hdev, &ncp); if (err >= 0) err = cmd_status(sk, index, MGMT_OP_PIN_CODE_REPLY, - EINVAL); + MGMT_STATUS_INVALID_PARAMS); goto failed; } - cmd = mgmt_pending_add(sk, MGMT_OP_PIN_CODE_REPLY, index, data, len); + cmd = mgmt_pending_add(sk, MGMT_OP_PIN_CODE_REPLY, hdev, data, len); if (!cmd) { err = -ENOMEM; goto failed; @@ -1209,7 +1391,7 @@ static int pin_code_reply(struct sock *sk, u16 index, unsigned char *data, mgmt_pending_remove(cmd); failed: - hci_dev_unlock_bh(hdev); + hci_dev_unlock(hdev); hci_dev_put(hdev); return err; @@ -1228,25 +1410,25 @@ static int pin_code_neg_reply(struct sock *sk, u16 index, unsigned char *data, if (len != sizeof(*cp)) return cmd_status(sk, index, MGMT_OP_PIN_CODE_NEG_REPLY, - EINVAL); + MGMT_STATUS_INVALID_PARAMS); hdev = hci_dev_get(index); if (!hdev) return cmd_status(sk, index, MGMT_OP_PIN_CODE_NEG_REPLY, - ENODEV); + MGMT_STATUS_INVALID_PARAMS); - hci_dev_lock_bh(hdev); + hci_dev_lock(hdev); if (!test_bit(HCI_UP, &hdev->flags)) { err = cmd_status(sk, index, MGMT_OP_PIN_CODE_NEG_REPLY, - ENETDOWN); + MGMT_STATUS_NOT_POWERED); goto failed; } err = send_pin_code_neg_reply(sk, index, hdev, cp); failed: - hci_dev_unlock_bh(hdev); + hci_dev_unlock(hdev); hci_dev_put(hdev); return err; @@ -1263,20 +1445,22 @@ static int set_io_capability(struct sock *sk, u16 index, unsigned char *data, cp = (void *) data; if (len != sizeof(*cp)) - return cmd_status(sk, index, MGMT_OP_SET_IO_CAPABILITY, EINVAL); + return cmd_status(sk, index, MGMT_OP_SET_IO_CAPABILITY, + MGMT_STATUS_INVALID_PARAMS); hdev = hci_dev_get(index); if (!hdev) - return cmd_status(sk, index, MGMT_OP_SET_IO_CAPABILITY, ENODEV); + return cmd_status(sk, index, MGMT_OP_SET_IO_CAPABILITY, + MGMT_STATUS_INVALID_PARAMS); - hci_dev_lock_bh(hdev); + hci_dev_lock(hdev); hdev->io_capability = cp->io_capability; BT_DBG("%s IO capability set to 0x%02x", hdev->name, hdev->io_capability); - hci_dev_unlock_bh(hdev); + hci_dev_unlock(hdev); hci_dev_put(hdev); return cmd_complete(sk, index, MGMT_OP_SET_IO_CAPABILITY, NULL, 0); @@ -1285,19 +1469,12 @@ static int set_io_capability(struct sock *sk, u16 index, unsigned char *data, static inline struct pending_cmd *find_pairing(struct hci_conn *conn) { struct hci_dev *hdev = conn->hdev; - struct list_head *p; - - list_for_each(p, &cmd_list) { - struct pending_cmd *cmd; - - cmd = list_entry(p, struct pending_cmd, list); + struct pending_cmd *cmd; + list_for_each_entry(cmd, &hdev->mgmt_pending, list) { if (cmd->opcode != MGMT_OP_PAIR_DEVICE) continue; - if (cmd->index != hdev->id) - continue; - if (cmd->user_data != conn) continue; @@ -1312,7 +1489,8 @@ static void pairing_complete(struct pending_cmd *cmd, u8 status) struct mgmt_rp_pair_device rp; struct hci_conn *conn = cmd->user_data; - bacpy(&rp.bdaddr, &conn->dst); + bacpy(&rp.addr.bdaddr, &conn->dst); + rp.addr.type = link_to_mgmt(conn->type, conn->dst_type); rp.status = status; cmd_complete(cmd->sk, cmd->index, MGMT_OP_PAIR_DEVICE, &rp, sizeof(rp)); @@ -1334,18 +1512,17 @@ static void pairing_complete_cb(struct hci_conn *conn, u8 status) BT_DBG("status %u", status); cmd = find_pairing(conn); - if (!cmd) { + if (!cmd) BT_DBG("Unable to find a pending command"); - return; - } - - pairing_complete(cmd, status); + else + pairing_complete(cmd, status); } static int pair_device(struct sock *sk, u16 index, unsigned char *data, u16 len) { struct hci_dev *hdev; struct mgmt_cp_pair_device *cp; + struct mgmt_rp_pair_device rp; struct pending_cmd *cmd; u8 sec_level, auth_type; struct hci_conn *conn; @@ -1356,42 +1533,59 @@ static int pair_device(struct sock *sk, u16 index, unsigned char *data, u16 len) cp = (void *) data; if (len != sizeof(*cp)) - return cmd_status(sk, index, MGMT_OP_PAIR_DEVICE, EINVAL); + return cmd_status(sk, index, MGMT_OP_PAIR_DEVICE, + MGMT_STATUS_INVALID_PARAMS); hdev = hci_dev_get(index); if (!hdev) - return cmd_status(sk, index, MGMT_OP_PAIR_DEVICE, ENODEV); + return cmd_status(sk, index, MGMT_OP_PAIR_DEVICE, + MGMT_STATUS_INVALID_PARAMS); - hci_dev_lock_bh(hdev); + hci_dev_lock(hdev); - if (cp->io_cap == 0x03) { - sec_level = BT_SECURITY_MEDIUM; + sec_level = BT_SECURITY_MEDIUM; + if (cp->io_cap == 0x03) auth_type = HCI_AT_DEDICATED_BONDING; - } else { - sec_level = BT_SECURITY_HIGH; + else auth_type = HCI_AT_DEDICATED_BONDING_MITM; - } - conn = hci_connect(hdev, ACL_LINK, &cp->bdaddr, sec_level, auth_type); + if (cp->addr.type == MGMT_ADDR_BREDR) + conn = hci_connect(hdev, ACL_LINK, &cp->addr.bdaddr, sec_level, + auth_type); + else + conn = hci_connect(hdev, LE_LINK, &cp->addr.bdaddr, sec_level, + auth_type); + + memset(&rp, 0, sizeof(rp)); + bacpy(&rp.addr.bdaddr, &cp->addr.bdaddr); + rp.addr.type = cp->addr.type; + if (IS_ERR(conn)) { - err = PTR_ERR(conn); + rp.status = -PTR_ERR(conn); + err = cmd_complete(sk, index, MGMT_OP_PAIR_DEVICE, + &rp, sizeof(rp)); goto unlock; } if (conn->connect_cfm_cb) { hci_conn_put(conn); - err = cmd_status(sk, index, MGMT_OP_PAIR_DEVICE, EBUSY); + rp.status = EBUSY; + err = cmd_complete(sk, index, MGMT_OP_PAIR_DEVICE, + &rp, sizeof(rp)); goto unlock; } - cmd = mgmt_pending_add(sk, MGMT_OP_PAIR_DEVICE, index, data, len); + cmd = mgmt_pending_add(sk, MGMT_OP_PAIR_DEVICE, hdev, data, len); if (!cmd) { err = -ENOMEM; hci_conn_put(conn); goto unlock; } - conn->connect_cfm_cb = pairing_complete_cb; + /* For LE, just connecting isn't a proof that the pairing finished */ + if (cp->addr.type == MGMT_ADDR_BREDR) + conn->connect_cfm_cb = pairing_complete_cb; + conn->security_cfm_cb = pairing_complete_cb; conn->disconn_cfm_cb = pairing_complete_cb; conn->io_capability = cp->io_cap; @@ -1404,62 +1598,151 @@ static int pair_device(struct sock *sk, u16 index, unsigned char *data, u16 len) err = 0; unlock: - hci_dev_unlock_bh(hdev); + hci_dev_unlock(hdev); hci_dev_put(hdev); return err; } -static int user_confirm_reply(struct sock *sk, u16 index, unsigned char *data, - u16 len, int success) +static int user_pairing_resp(struct sock *sk, u16 index, bdaddr_t *bdaddr, + u16 mgmt_op, u16 hci_op, __le32 passkey) { - struct mgmt_cp_user_confirm_reply *cp = (void *) data; - u16 mgmt_op, hci_op; struct pending_cmd *cmd; struct hci_dev *hdev; + struct hci_conn *conn; int err; - BT_DBG(""); + hdev = hci_dev_get(index); + if (!hdev) + return cmd_status(sk, index, mgmt_op, + MGMT_STATUS_INVALID_PARAMS); - if (success) { - mgmt_op = MGMT_OP_USER_CONFIRM_REPLY; - hci_op = HCI_OP_USER_CONFIRM_REPLY; - } else { - mgmt_op = MGMT_OP_USER_CONFIRM_NEG_REPLY; - hci_op = HCI_OP_USER_CONFIRM_NEG_REPLY; + hci_dev_lock(hdev); + + if (!test_bit(HCI_UP, &hdev->flags)) { + err = cmd_status(sk, index, mgmt_op, MGMT_STATUS_NOT_POWERED); + goto done; } - if (len != sizeof(*cp)) - return cmd_status(sk, index, mgmt_op, EINVAL); + /* + * Check for an existing ACL link, if present pair via + * HCI commands. + * + * If no ACL link is present, check for an LE link and if + * present, pair via the SMP engine. + * + * If neither ACL nor LE links are present, fail with error. + */ + conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, bdaddr); + if (!conn) { + conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, bdaddr); + if (!conn) { + err = cmd_status(sk, index, mgmt_op, + MGMT_STATUS_NOT_CONNECTED); + goto done; + } - hdev = hci_dev_get(index); - if (!hdev) - return cmd_status(sk, index, mgmt_op, ENODEV); + /* Continue with pairing via SMP */ + err = smp_user_confirm_reply(conn, mgmt_op, passkey); - hci_dev_lock_bh(hdev); + if (!err) + err = cmd_status(sk, index, mgmt_op, + MGMT_STATUS_SUCCESS); + else + err = cmd_status(sk, index, mgmt_op, + MGMT_STATUS_FAILED); - if (!test_bit(HCI_UP, &hdev->flags)) { - err = cmd_status(sk, index, mgmt_op, ENETDOWN); - goto failed; + goto done; } - cmd = mgmt_pending_add(sk, mgmt_op, index, data, len); + cmd = mgmt_pending_add(sk, mgmt_op, hdev, bdaddr, sizeof(*bdaddr)); if (!cmd) { err = -ENOMEM; - goto failed; + goto done; } - err = hci_send_cmd(hdev, hci_op, sizeof(cp->bdaddr), &cp->bdaddr); + /* Continue with pairing via HCI */ + if (hci_op == HCI_OP_USER_PASSKEY_REPLY) { + struct hci_cp_user_passkey_reply cp; + + bacpy(&cp.bdaddr, bdaddr); + cp.passkey = passkey; + err = hci_send_cmd(hdev, hci_op, sizeof(cp), &cp); + } else + err = hci_send_cmd(hdev, hci_op, sizeof(*bdaddr), bdaddr); + if (err < 0) mgmt_pending_remove(cmd); -failed: - hci_dev_unlock_bh(hdev); +done: + hci_dev_unlock(hdev); hci_dev_put(hdev); return err; } +static int user_confirm_reply(struct sock *sk, u16 index, void *data, u16 len) +{ + struct mgmt_cp_user_confirm_reply *cp = (void *) data; + + BT_DBG(""); + + if (len != sizeof(*cp)) + return cmd_status(sk, index, MGMT_OP_USER_CONFIRM_REPLY, + MGMT_STATUS_INVALID_PARAMS); + + return user_pairing_resp(sk, index, &cp->bdaddr, + MGMT_OP_USER_CONFIRM_REPLY, + HCI_OP_USER_CONFIRM_REPLY, 0); +} + +static int user_confirm_neg_reply(struct sock *sk, u16 index, void *data, + u16 len) +{ + struct mgmt_cp_user_confirm_neg_reply *cp = data; + + BT_DBG(""); + + if (len != sizeof(*cp)) + return cmd_status(sk, index, MGMT_OP_USER_CONFIRM_NEG_REPLY, + MGMT_STATUS_INVALID_PARAMS); + + return user_pairing_resp(sk, index, &cp->bdaddr, + MGMT_OP_USER_CONFIRM_NEG_REPLY, + HCI_OP_USER_CONFIRM_NEG_REPLY, 0); +} + +static int user_passkey_reply(struct sock *sk, u16 index, void *data, u16 len) +{ + struct mgmt_cp_user_passkey_reply *cp = (void *) data; + + BT_DBG(""); + + if (len != sizeof(*cp)) + return cmd_status(sk, index, MGMT_OP_USER_PASSKEY_REPLY, + EINVAL); + + return user_pairing_resp(sk, index, &cp->bdaddr, + MGMT_OP_USER_PASSKEY_REPLY, + HCI_OP_USER_PASSKEY_REPLY, cp->passkey); +} + +static int user_passkey_neg_reply(struct sock *sk, u16 index, void *data, + u16 len) +{ + struct mgmt_cp_user_passkey_neg_reply *cp = (void *) data; + + BT_DBG(""); + + if (len != sizeof(*cp)) + return cmd_status(sk, index, MGMT_OP_USER_PASSKEY_NEG_REPLY, + EINVAL); + + return user_pairing_resp(sk, index, &cp->bdaddr, + MGMT_OP_USER_PASSKEY_NEG_REPLY, + HCI_OP_USER_PASSKEY_NEG_REPLY, 0); +} + static int set_local_name(struct sock *sk, u16 index, unsigned char *data, u16 len) { @@ -1472,15 +1755,17 @@ static int set_local_name(struct sock *sk, u16 index, unsigned char *data, BT_DBG(""); if (len != sizeof(*mgmt_cp)) - return cmd_status(sk, index, MGMT_OP_SET_LOCAL_NAME, EINVAL); + return cmd_status(sk, index, MGMT_OP_SET_LOCAL_NAME, + MGMT_STATUS_INVALID_PARAMS); hdev = hci_dev_get(index); if (!hdev) - return cmd_status(sk, index, MGMT_OP_SET_LOCAL_NAME, ENODEV); + return cmd_status(sk, index, MGMT_OP_SET_LOCAL_NAME, + MGMT_STATUS_INVALID_PARAMS); - hci_dev_lock_bh(hdev); + hci_dev_lock(hdev); - cmd = mgmt_pending_add(sk, MGMT_OP_SET_LOCAL_NAME, index, data, len); + cmd = mgmt_pending_add(sk, MGMT_OP_SET_LOCAL_NAME, hdev, data, len); if (!cmd) { err = -ENOMEM; goto failed; @@ -1493,7 +1778,7 @@ static int set_local_name(struct sock *sk, u16 index, unsigned char *data, mgmt_pending_remove(cmd); failed: - hci_dev_unlock_bh(hdev); + hci_dev_unlock(hdev); hci_dev_put(hdev); return err; @@ -1510,28 +1795,29 @@ static int read_local_oob_data(struct sock *sk, u16 index) hdev = hci_dev_get(index); if (!hdev) return cmd_status(sk, index, MGMT_OP_READ_LOCAL_OOB_DATA, - ENODEV); + MGMT_STATUS_INVALID_PARAMS); - hci_dev_lock_bh(hdev); + hci_dev_lock(hdev); if (!test_bit(HCI_UP, &hdev->flags)) { err = cmd_status(sk, index, MGMT_OP_READ_LOCAL_OOB_DATA, - ENETDOWN); + MGMT_STATUS_NOT_POWERED); goto unlock; } if (!(hdev->features[6] & LMP_SIMPLE_PAIR)) { err = cmd_status(sk, index, MGMT_OP_READ_LOCAL_OOB_DATA, - EOPNOTSUPP); + MGMT_STATUS_NOT_SUPPORTED); goto unlock; } - if (mgmt_pending_find(MGMT_OP_READ_LOCAL_OOB_DATA, index)) { - err = cmd_status(sk, index, MGMT_OP_READ_LOCAL_OOB_DATA, EBUSY); + if (mgmt_pending_find(MGMT_OP_READ_LOCAL_OOB_DATA, hdev)) { + err = cmd_status(sk, index, MGMT_OP_READ_LOCAL_OOB_DATA, + MGMT_STATUS_BUSY); goto unlock; } - cmd = mgmt_pending_add(sk, MGMT_OP_READ_LOCAL_OOB_DATA, index, NULL, 0); + cmd = mgmt_pending_add(sk, MGMT_OP_READ_LOCAL_OOB_DATA, hdev, NULL, 0); if (!cmd) { err = -ENOMEM; goto unlock; @@ -1542,7 +1828,7 @@ static int read_local_oob_data(struct sock *sk, u16 index) mgmt_pending_remove(cmd); unlock: - hci_dev_unlock_bh(hdev); + hci_dev_unlock(hdev); hci_dev_put(hdev); return err; @@ -1559,24 +1845,25 @@ static int add_remote_oob_data(struct sock *sk, u16 index, unsigned char *data, if (len != sizeof(*cp)) return cmd_status(sk, index, MGMT_OP_ADD_REMOTE_OOB_DATA, - EINVAL); + MGMT_STATUS_INVALID_PARAMS); hdev = hci_dev_get(index); if (!hdev) return cmd_status(sk, index, MGMT_OP_ADD_REMOTE_OOB_DATA, - ENODEV); + MGMT_STATUS_INVALID_PARAMS); - hci_dev_lock_bh(hdev); + hci_dev_lock(hdev); err = hci_add_remote_oob_data(hdev, &cp->bdaddr, cp->hash, cp->randomizer); if (err < 0) - err = cmd_status(sk, index, MGMT_OP_ADD_REMOTE_OOB_DATA, -err); + err = cmd_status(sk, index, MGMT_OP_ADD_REMOTE_OOB_DATA, + MGMT_STATUS_FAILED); else err = cmd_complete(sk, index, MGMT_OP_ADD_REMOTE_OOB_DATA, NULL, 0); - hci_dev_unlock_bh(hdev); + hci_dev_unlock(hdev); hci_dev_put(hdev); return err; @@ -1593,62 +1880,68 @@ static int remove_remote_oob_data(struct sock *sk, u16 index, if (len != sizeof(*cp)) return cmd_status(sk, index, MGMT_OP_REMOVE_REMOTE_OOB_DATA, - EINVAL); + MGMT_STATUS_INVALID_PARAMS); hdev = hci_dev_get(index); if (!hdev) return cmd_status(sk, index, MGMT_OP_REMOVE_REMOTE_OOB_DATA, - ENODEV); + MGMT_STATUS_INVALID_PARAMS); - hci_dev_lock_bh(hdev); + hci_dev_lock(hdev); err = hci_remove_remote_oob_data(hdev, &cp->bdaddr); if (err < 0) err = cmd_status(sk, index, MGMT_OP_REMOVE_REMOTE_OOB_DATA, - -err); + MGMT_STATUS_INVALID_PARAMS); else err = cmd_complete(sk, index, MGMT_OP_REMOVE_REMOTE_OOB_DATA, NULL, 0); - hci_dev_unlock_bh(hdev); + hci_dev_unlock(hdev); hci_dev_put(hdev); return err; } -static int start_discovery(struct sock *sk, u16 index) +static int start_discovery(struct sock *sk, u16 index, + unsigned char *data, u16 len) { - u8 lap[3] = { 0x33, 0x8b, 0x9e }; - struct hci_cp_inquiry cp; + struct mgmt_cp_start_discovery *cp = (void *) data; struct pending_cmd *cmd; struct hci_dev *hdev; int err; BT_DBG("hci%u", index); + if (len != sizeof(*cp)) + return cmd_status(sk, index, MGMT_OP_START_DISCOVERY, + MGMT_STATUS_INVALID_PARAMS); + hdev = hci_dev_get(index); if (!hdev) - return cmd_status(sk, index, MGMT_OP_START_DISCOVERY, ENODEV); + return cmd_status(sk, index, MGMT_OP_START_DISCOVERY, + MGMT_STATUS_INVALID_PARAMS); - hci_dev_lock_bh(hdev); + hci_dev_lock(hdev); - cmd = mgmt_pending_add(sk, MGMT_OP_START_DISCOVERY, index, NULL, 0); + if (!test_bit(HCI_UP, &hdev->flags)) { + err = cmd_status(sk, index, MGMT_OP_START_DISCOVERY, + MGMT_STATUS_NOT_POWERED); + goto failed; + } + + cmd = mgmt_pending_add(sk, MGMT_OP_START_DISCOVERY, hdev, NULL, 0); if (!cmd) { err = -ENOMEM; goto failed; } - memset(&cp, 0, sizeof(cp)); - memcpy(&cp.lap, lap, 3); - cp.length = 0x08; - cp.num_rsp = 0x00; - - err = hci_send_cmd(hdev, HCI_OP_INQUIRY, sizeof(cp), &cp); + err = hci_do_inquiry(hdev, INQUIRY_LEN_BREDR); if (err < 0) mgmt_pending_remove(cmd); failed: - hci_dev_unlock_bh(hdev); + hci_dev_unlock(hdev); hci_dev_put(hdev); return err; @@ -1664,22 +1957,23 @@ static int stop_discovery(struct sock *sk, u16 index) hdev = hci_dev_get(index); if (!hdev) - return cmd_status(sk, index, MGMT_OP_STOP_DISCOVERY, ENODEV); + return cmd_status(sk, index, MGMT_OP_STOP_DISCOVERY, + MGMT_STATUS_INVALID_PARAMS); - hci_dev_lock_bh(hdev); + hci_dev_lock(hdev); - cmd = mgmt_pending_add(sk, MGMT_OP_STOP_DISCOVERY, index, NULL, 0); + cmd = mgmt_pending_add(sk, MGMT_OP_STOP_DISCOVERY, hdev, NULL, 0); if (!cmd) { err = -ENOMEM; goto failed; } - err = hci_send_cmd(hdev, HCI_OP_INQUIRY_CANCEL, 0, NULL); + err = hci_cancel_inquiry(hdev); if (err < 0) mgmt_pending_remove(cmd); failed: - hci_dev_unlock_bh(hdev); + hci_dev_unlock(hdev); hci_dev_put(hdev); return err; @@ -1689,29 +1983,31 @@ static int block_device(struct sock *sk, u16 index, unsigned char *data, u16 len) { struct hci_dev *hdev; - struct mgmt_cp_block_device *cp; + struct mgmt_cp_block_device *cp = (void *) data; int err; BT_DBG("hci%u", index); - cp = (void *) data; - if (len != sizeof(*cp)) return cmd_status(sk, index, MGMT_OP_BLOCK_DEVICE, - EINVAL); + MGMT_STATUS_INVALID_PARAMS); hdev = hci_dev_get(index); if (!hdev) return cmd_status(sk, index, MGMT_OP_BLOCK_DEVICE, - ENODEV); + MGMT_STATUS_INVALID_PARAMS); - err = hci_blacklist_add(hdev, &cp->bdaddr); + hci_dev_lock(hdev); + err = hci_blacklist_add(hdev, &cp->bdaddr); if (err < 0) - err = cmd_status(sk, index, MGMT_OP_BLOCK_DEVICE, -err); + err = cmd_status(sk, index, MGMT_OP_BLOCK_DEVICE, + MGMT_STATUS_FAILED); else err = cmd_complete(sk, index, MGMT_OP_BLOCK_DEVICE, NULL, 0); + + hci_dev_unlock(hdev); hci_dev_put(hdev); return err; @@ -1721,29 +2017,88 @@ static int unblock_device(struct sock *sk, u16 index, unsigned char *data, u16 len) { struct hci_dev *hdev; - struct mgmt_cp_unblock_device *cp; + struct mgmt_cp_unblock_device *cp = (void *) data; int err; BT_DBG("hci%u", index); - cp = (void *) data; - if (len != sizeof(*cp)) return cmd_status(sk, index, MGMT_OP_UNBLOCK_DEVICE, - EINVAL); + MGMT_STATUS_INVALID_PARAMS); hdev = hci_dev_get(index); if (!hdev) return cmd_status(sk, index, MGMT_OP_UNBLOCK_DEVICE, - ENODEV); + MGMT_STATUS_INVALID_PARAMS); + + hci_dev_lock(hdev); err = hci_blacklist_del(hdev, &cp->bdaddr); if (err < 0) - err = cmd_status(sk, index, MGMT_OP_UNBLOCK_DEVICE, -err); + err = cmd_status(sk, index, MGMT_OP_UNBLOCK_DEVICE, + MGMT_STATUS_INVALID_PARAMS); else err = cmd_complete(sk, index, MGMT_OP_UNBLOCK_DEVICE, NULL, 0); + + hci_dev_unlock(hdev); + hci_dev_put(hdev); + + return err; +} + +static int set_fast_connectable(struct sock *sk, u16 index, + unsigned char *data, u16 len) +{ + struct hci_dev *hdev; + struct mgmt_mode *cp = (void *) data; + struct hci_cp_write_page_scan_activity acp; + u8 type; + int err; + + BT_DBG("hci%u", index); + + if (len != sizeof(*cp)) + return cmd_status(sk, index, MGMT_OP_SET_FAST_CONNECTABLE, + MGMT_STATUS_INVALID_PARAMS); + + hdev = hci_dev_get(index); + if (!hdev) + return cmd_status(sk, index, MGMT_OP_SET_FAST_CONNECTABLE, + MGMT_STATUS_INVALID_PARAMS); + + hci_dev_lock(hdev); + + if (cp->val) { + type = PAGE_SCAN_TYPE_INTERLACED; + acp.interval = 0x0024; /* 22.5 msec page scan interval */ + } else { + type = PAGE_SCAN_TYPE_STANDARD; /* default */ + acp.interval = 0x0800; /* default 1.28 sec page scan */ + } + + acp.window = 0x0012; /* default 11.25 msec page scan window */ + + err = hci_send_cmd(hdev, HCI_OP_WRITE_PAGE_SCAN_ACTIVITY, + sizeof(acp), &acp); + if (err < 0) { + err = cmd_status(sk, index, MGMT_OP_SET_FAST_CONNECTABLE, + MGMT_STATUS_FAILED); + goto done; + } + + err = hci_send_cmd(hdev, HCI_OP_WRITE_PAGE_SCAN_TYPE, 1, &type); + if (err < 0) { + err = cmd_status(sk, index, MGMT_OP_SET_FAST_CONNECTABLE, + MGMT_STATUS_FAILED); + goto done; + } + + err = cmd_complete(sk, index, MGMT_OP_SET_FAST_CONNECTABLE, + NULL, 0); +done: + hci_dev_unlock(hdev); hci_dev_put(hdev); return err; @@ -1799,6 +2154,10 @@ int mgmt_control(struct sock *sk, struct msghdr *msg, size_t msglen) case MGMT_OP_SET_CONNECTABLE: err = set_connectable(sk, index, buf + sizeof(*hdr), len); break; + case MGMT_OP_SET_FAST_CONNECTABLE: + err = set_fast_connectable(sk, index, buf + sizeof(*hdr), + len); + break; case MGMT_OP_SET_PAIRABLE: err = set_pairable(sk, index, buf + sizeof(*hdr), len); break; @@ -1811,14 +2170,11 @@ int mgmt_control(struct sock *sk, struct msghdr *msg, size_t msglen) case MGMT_OP_SET_DEV_CLASS: err = set_dev_class(sk, index, buf + sizeof(*hdr), len); break; - case MGMT_OP_SET_SERVICE_CACHE: - err = set_service_cache(sk, index, buf + sizeof(*hdr), len); - break; - case MGMT_OP_LOAD_KEYS: - err = load_keys(sk, index, buf + sizeof(*hdr), len); + case MGMT_OP_LOAD_LINK_KEYS: + err = load_link_keys(sk, index, buf + sizeof(*hdr), len); break; - case MGMT_OP_REMOVE_KEY: - err = remove_key(sk, index, buf + sizeof(*hdr), len); + case MGMT_OP_REMOVE_KEYS: + err = remove_keys(sk, index, buf + sizeof(*hdr), len); break; case MGMT_OP_DISCONNECT: err = disconnect(sk, index, buf + sizeof(*hdr), len); @@ -1839,10 +2195,18 @@ int mgmt_control(struct sock *sk, struct msghdr *msg, size_t msglen) err = pair_device(sk, index, buf + sizeof(*hdr), len); break; case MGMT_OP_USER_CONFIRM_REPLY: - err = user_confirm_reply(sk, index, buf + sizeof(*hdr), len, 1); + err = user_confirm_reply(sk, index, buf + sizeof(*hdr), len); break; case MGMT_OP_USER_CONFIRM_NEG_REPLY: - err = user_confirm_reply(sk, index, buf + sizeof(*hdr), len, 0); + err = user_confirm_neg_reply(sk, index, buf + sizeof(*hdr), + len); + break; + case MGMT_OP_USER_PASSKEY_REPLY: + err = user_passkey_reply(sk, index, buf + sizeof(*hdr), len); + break; + case MGMT_OP_USER_PASSKEY_NEG_REPLY: + err = user_passkey_neg_reply(sk, index, buf + sizeof(*hdr), + len); break; case MGMT_OP_SET_LOCAL_NAME: err = set_local_name(sk, index, buf + sizeof(*hdr), len); @@ -1858,7 +2222,7 @@ int mgmt_control(struct sock *sk, struct msghdr *msg, size_t msglen) len); break; case MGMT_OP_START_DISCOVERY: - err = start_discovery(sk, index); + err = start_discovery(sk, index, buf + sizeof(*hdr), len); break; case MGMT_OP_STOP_DISCOVERY: err = stop_discovery(sk, index); @@ -1871,7 +2235,8 @@ int mgmt_control(struct sock *sk, struct msghdr *msg, size_t msglen) break; default: BT_DBG("Unknown op %u", opcode); - err = cmd_status(sk, index, opcode, 0x01); + err = cmd_status(sk, index, opcode, + MGMT_STATUS_UNKNOWN_COMMAND); break; } @@ -1885,30 +2250,39 @@ done: return err; } -int mgmt_index_added(u16 index) +static void cmd_status_rsp(struct pending_cmd *cmd, void *data) +{ + u8 *status = data; + + cmd_status(cmd->sk, cmd->index, cmd->opcode, *status); + mgmt_pending_remove(cmd); +} + +int mgmt_index_added(struct hci_dev *hdev) { - return mgmt_event(MGMT_EV_INDEX_ADDED, index, NULL, 0, NULL); + return mgmt_event(MGMT_EV_INDEX_ADDED, hdev, NULL, 0, NULL); } -int mgmt_index_removed(u16 index) +int mgmt_index_removed(struct hci_dev *hdev) { - return mgmt_event(MGMT_EV_INDEX_REMOVED, index, NULL, 0, NULL); + u8 status = ENODEV; + + mgmt_pending_foreach(0, hdev, cmd_status_rsp, &status); + + return mgmt_event(MGMT_EV_INDEX_REMOVED, hdev, NULL, 0, NULL); } struct cmd_lookup { u8 val; struct sock *sk; + struct hci_dev *hdev; }; -static void mode_rsp(struct pending_cmd *cmd, void *data) +static void settings_rsp(struct pending_cmd *cmd, void *data) { - struct mgmt_mode *cp = cmd->param; struct cmd_lookup *match = data; - if (cp->val != match->val) - return; - - send_mode_rsp(cmd->sk, cmd->opcode, cmd->index, cp->val); + send_settings_rsp(cmd->sk, cmd->opcode, match->hdev); list_del(&cmd->list); @@ -1920,17 +2294,23 @@ static void mode_rsp(struct pending_cmd *cmd, void *data) mgmt_pending_free(cmd); } -int mgmt_powered(u16 index, u8 powered) +int mgmt_powered(struct hci_dev *hdev, u8 powered) { - struct mgmt_mode ev; - struct cmd_lookup match = { powered, NULL }; + struct cmd_lookup match = { powered, NULL, hdev }; + __le32 ev; int ret; - mgmt_pending_foreach(MGMT_OP_SET_POWERED, index, mode_rsp, &match); + mgmt_pending_foreach(MGMT_OP_SET_POWERED, hdev, settings_rsp, &match); + + if (!powered) { + u8 status = ENETDOWN; + mgmt_pending_foreach(0, hdev, cmd_status_rsp, &status); + } - ev.val = powered; + ev = cpu_to_le32(get_current_settings(hdev)); - ret = mgmt_event(MGMT_EV_POWERED, index, &ev, sizeof(ev), match.sk); + ret = mgmt_event(MGMT_EV_NEW_SETTINGS, hdev, &ev, sizeof(ev), + match.sk); if (match.sk) sock_put(match.sk); @@ -1938,36 +2318,36 @@ int mgmt_powered(u16 index, u8 powered) return ret; } -int mgmt_discoverable(u16 index, u8 discoverable) +int mgmt_discoverable(struct hci_dev *hdev, u8 discoverable) { - struct mgmt_mode ev; - struct cmd_lookup match = { discoverable, NULL }; + struct cmd_lookup match = { discoverable, NULL, hdev }; + __le32 ev; int ret; - mgmt_pending_foreach(MGMT_OP_SET_DISCOVERABLE, index, mode_rsp, &match); + mgmt_pending_foreach(MGMT_OP_SET_DISCOVERABLE, hdev, settings_rsp, &match); - ev.val = discoverable; + ev = cpu_to_le32(get_current_settings(hdev)); - ret = mgmt_event(MGMT_EV_DISCOVERABLE, index, &ev, sizeof(ev), + ret = mgmt_event(MGMT_EV_NEW_SETTINGS, hdev, &ev, sizeof(ev), match.sk); - if (match.sk) sock_put(match.sk); return ret; } -int mgmt_connectable(u16 index, u8 connectable) +int mgmt_connectable(struct hci_dev *hdev, u8 connectable) { - struct mgmt_mode ev; - struct cmd_lookup match = { connectable, NULL }; + __le32 ev; + struct cmd_lookup match = { connectable, NULL, hdev }; int ret; - mgmt_pending_foreach(MGMT_OP_SET_CONNECTABLE, index, mode_rsp, &match); + mgmt_pending_foreach(MGMT_OP_SET_CONNECTABLE, hdev, settings_rsp, + &match); - ev.val = connectable; + ev = cpu_to_le32(get_current_settings(hdev)); - ret = mgmt_event(MGMT_EV_CONNECTABLE, index, &ev, sizeof(ev), match.sk); + ret = mgmt_event(MGMT_EV_NEW_SETTINGS, hdev, &ev, sizeof(ev), match.sk); if (match.sk) sock_put(match.sk); @@ -1975,39 +2355,46 @@ int mgmt_connectable(u16 index, u8 connectable) return ret; } -int mgmt_new_key(u16 index, struct link_key *key, u8 persistent) +int mgmt_write_scan_failed(struct hci_dev *hdev, u8 scan, u8 status) { - struct mgmt_ev_new_key *ev; - int err, total; + u8 mgmt_err = mgmt_status(status); - total = sizeof(struct mgmt_ev_new_key) + key->dlen; - ev = kzalloc(total, GFP_ATOMIC); - if (!ev) - return -ENOMEM; + if (scan & SCAN_PAGE) + mgmt_pending_foreach(MGMT_OP_SET_CONNECTABLE, hdev, + cmd_status_rsp, &mgmt_err); - bacpy(&ev->key.bdaddr, &key->bdaddr); - ev->key.type = key->type; - memcpy(ev->key.val, key->val, 16); - ev->key.pin_len = key->pin_len; - ev->key.dlen = key->dlen; - ev->store_hint = persistent; + if (scan & SCAN_INQUIRY) + mgmt_pending_foreach(MGMT_OP_SET_DISCOVERABLE, hdev, + cmd_status_rsp, &mgmt_err); - memcpy(ev->key.data, key->data, key->dlen); + return 0; +} + +int mgmt_new_link_key(struct hci_dev *hdev, struct link_key *key, + u8 persistent) +{ + struct mgmt_ev_new_link_key ev; - err = mgmt_event(MGMT_EV_NEW_KEY, index, ev, total, NULL); + memset(&ev, 0, sizeof(ev)); - kfree(ev); + ev.store_hint = persistent; + bacpy(&ev.key.bdaddr, &key->bdaddr); + ev.key.type = key->type; + memcpy(ev.key.val, key->val, 16); + ev.key.pin_len = key->pin_len; - return err; + return mgmt_event(MGMT_EV_NEW_LINK_KEY, hdev, &ev, sizeof(ev), NULL); } -int mgmt_connected(u16 index, bdaddr_t *bdaddr) +int mgmt_connected(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, + u8 addr_type) { - struct mgmt_ev_connected ev; + struct mgmt_addr_info ev; bacpy(&ev.bdaddr, bdaddr); + ev.type = link_to_mgmt(link_type, addr_type); - return mgmt_event(MGMT_EV_CONNECTED, index, &ev, sizeof(ev), NULL); + return mgmt_event(MGMT_EV_CONNECTED, hdev, &ev, sizeof(ev), NULL); } static void disconnect_rsp(struct pending_cmd *cmd, void *data) @@ -2017,6 +2404,7 @@ static void disconnect_rsp(struct pending_cmd *cmd, void *data) struct mgmt_rp_disconnect rp; bacpy(&rp.bdaddr, &cp->bdaddr); + rp.status = 0; cmd_complete(cmd->sk, cmd->index, MGMT_OP_DISCONNECT, &rp, sizeof(rp)); @@ -2026,75 +2414,110 @@ static void disconnect_rsp(struct pending_cmd *cmd, void *data) mgmt_pending_remove(cmd); } -int mgmt_disconnected(u16 index, bdaddr_t *bdaddr) +static void remove_keys_rsp(struct pending_cmd *cmd, void *data) { - struct mgmt_ev_disconnected ev; + u8 *status = data; + struct mgmt_cp_remove_keys *cp = cmd->param; + struct mgmt_rp_remove_keys rp; + + memset(&rp, 0, sizeof(rp)); + bacpy(&rp.bdaddr, &cp->bdaddr); + if (status != NULL) + rp.status = *status; + + cmd_complete(cmd->sk, cmd->index, MGMT_OP_REMOVE_KEYS, &rp, + sizeof(rp)); + + mgmt_pending_remove(cmd); +} + +int mgmt_disconnected(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, + u8 addr_type) +{ + struct mgmt_addr_info ev; struct sock *sk = NULL; int err; - mgmt_pending_foreach(MGMT_OP_DISCONNECT, index, disconnect_rsp, &sk); + mgmt_pending_foreach(MGMT_OP_DISCONNECT, hdev, disconnect_rsp, &sk); bacpy(&ev.bdaddr, bdaddr); + ev.type = link_to_mgmt(link_type, addr_type); - err = mgmt_event(MGMT_EV_DISCONNECTED, index, &ev, sizeof(ev), sk); + err = mgmt_event(MGMT_EV_DISCONNECTED, hdev, &ev, sizeof(ev), sk); if (sk) sock_put(sk); + mgmt_pending_foreach(MGMT_OP_REMOVE_KEYS, hdev, remove_keys_rsp, NULL); + return err; } -int mgmt_disconnect_failed(u16 index) +int mgmt_disconnect_failed(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 status) { struct pending_cmd *cmd; + u8 mgmt_err = mgmt_status(status); int err; - cmd = mgmt_pending_find(MGMT_OP_DISCONNECT, index); + cmd = mgmt_pending_find(MGMT_OP_DISCONNECT, hdev); if (!cmd) return -ENOENT; - err = cmd_status(cmd->sk, index, MGMT_OP_DISCONNECT, EIO); + if (bdaddr) { + struct mgmt_rp_disconnect rp; + + bacpy(&rp.bdaddr, bdaddr); + rp.status = status; + + err = cmd_complete(cmd->sk, cmd->index, MGMT_OP_DISCONNECT, + &rp, sizeof(rp)); + } else + err = cmd_status(cmd->sk, hdev->id, MGMT_OP_DISCONNECT, + mgmt_err); mgmt_pending_remove(cmd); return err; } -int mgmt_connect_failed(u16 index, bdaddr_t *bdaddr, u8 status) +int mgmt_connect_failed(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, + u8 addr_type, u8 status) { struct mgmt_ev_connect_failed ev; - bacpy(&ev.bdaddr, bdaddr); - ev.status = status; + bacpy(&ev.addr.bdaddr, bdaddr); + ev.addr.type = link_to_mgmt(link_type, addr_type); + ev.status = mgmt_status(status); - return mgmt_event(MGMT_EV_CONNECT_FAILED, index, &ev, sizeof(ev), NULL); + return mgmt_event(MGMT_EV_CONNECT_FAILED, hdev, &ev, sizeof(ev), NULL); } -int mgmt_pin_code_request(u16 index, bdaddr_t *bdaddr, u8 secure) +int mgmt_pin_code_request(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 secure) { struct mgmt_ev_pin_code_request ev; bacpy(&ev.bdaddr, bdaddr); ev.secure = secure; - return mgmt_event(MGMT_EV_PIN_CODE_REQUEST, index, &ev, sizeof(ev), + return mgmt_event(MGMT_EV_PIN_CODE_REQUEST, hdev, &ev, sizeof(ev), NULL); } -int mgmt_pin_code_reply_complete(u16 index, bdaddr_t *bdaddr, u8 status) +int mgmt_pin_code_reply_complete(struct hci_dev *hdev, bdaddr_t *bdaddr, + u8 status) { struct pending_cmd *cmd; struct mgmt_rp_pin_code_reply rp; int err; - cmd = mgmt_pending_find(MGMT_OP_PIN_CODE_REPLY, index); + cmd = mgmt_pending_find(MGMT_OP_PIN_CODE_REPLY, hdev); if (!cmd) return -ENOENT; bacpy(&rp.bdaddr, bdaddr); - rp.status = status; + rp.status = mgmt_status(status); - err = cmd_complete(cmd->sk, index, MGMT_OP_PIN_CODE_REPLY, &rp, + err = cmd_complete(cmd->sk, hdev->id, MGMT_OP_PIN_CODE_REPLY, &rp, sizeof(rp)); mgmt_pending_remove(cmd); @@ -2102,20 +2525,21 @@ int mgmt_pin_code_reply_complete(u16 index, bdaddr_t *bdaddr, u8 status) return err; } -int mgmt_pin_code_neg_reply_complete(u16 index, bdaddr_t *bdaddr, u8 status) +int mgmt_pin_code_neg_reply_complete(struct hci_dev *hdev, bdaddr_t *bdaddr, + u8 status) { struct pending_cmd *cmd; struct mgmt_rp_pin_code_reply rp; int err; - cmd = mgmt_pending_find(MGMT_OP_PIN_CODE_NEG_REPLY, index); + cmd = mgmt_pending_find(MGMT_OP_PIN_CODE_NEG_REPLY, hdev); if (!cmd) return -ENOENT; bacpy(&rp.bdaddr, bdaddr); - rp.status = status; + rp.status = mgmt_status(status); - err = cmd_complete(cmd->sk, index, MGMT_OP_PIN_CODE_NEG_REPLY, &rp, + err = cmd_complete(cmd->sk, hdev->id, MGMT_OP_PIN_CODE_NEG_REPLY, &rp, sizeof(rp)); mgmt_pending_remove(cmd); @@ -2123,97 +2547,119 @@ int mgmt_pin_code_neg_reply_complete(u16 index, bdaddr_t *bdaddr, u8 status) return err; } -int mgmt_user_confirm_request(u16 index, bdaddr_t *bdaddr, __le32 value, - u8 confirm_hint) +int mgmt_user_confirm_request(struct hci_dev *hdev, bdaddr_t *bdaddr, + __le32 value, u8 confirm_hint) { struct mgmt_ev_user_confirm_request ev; - BT_DBG("hci%u", index); + BT_DBG("%s", hdev->name); bacpy(&ev.bdaddr, bdaddr); ev.confirm_hint = confirm_hint; put_unaligned_le32(value, &ev.value); - return mgmt_event(MGMT_EV_USER_CONFIRM_REQUEST, index, &ev, sizeof(ev), + return mgmt_event(MGMT_EV_USER_CONFIRM_REQUEST, hdev, &ev, sizeof(ev), + NULL); +} + +int mgmt_user_passkey_request(struct hci_dev *hdev, bdaddr_t *bdaddr) +{ + struct mgmt_ev_user_passkey_request ev; + + BT_DBG("%s", hdev->name); + + bacpy(&ev.bdaddr, bdaddr); + + return mgmt_event(MGMT_EV_USER_PASSKEY_REQUEST, hdev, &ev, sizeof(ev), NULL); } -static int confirm_reply_complete(u16 index, bdaddr_t *bdaddr, u8 status, - u8 opcode) +static int user_pairing_resp_complete(struct hci_dev *hdev, bdaddr_t *bdaddr, + u8 status, u8 opcode) { struct pending_cmd *cmd; struct mgmt_rp_user_confirm_reply rp; int err; - cmd = mgmt_pending_find(opcode, index); + cmd = mgmt_pending_find(opcode, hdev); if (!cmd) return -ENOENT; bacpy(&rp.bdaddr, bdaddr); - rp.status = status; - err = cmd_complete(cmd->sk, index, opcode, &rp, sizeof(rp)); + rp.status = mgmt_status(status); + err = cmd_complete(cmd->sk, hdev->id, opcode, &rp, sizeof(rp)); mgmt_pending_remove(cmd); return err; } -int mgmt_user_confirm_reply_complete(u16 index, bdaddr_t *bdaddr, u8 status) +int mgmt_user_confirm_reply_complete(struct hci_dev *hdev, bdaddr_t *bdaddr, + u8 status) { - return confirm_reply_complete(index, bdaddr, status, + return user_pairing_resp_complete(hdev, bdaddr, status, MGMT_OP_USER_CONFIRM_REPLY); } -int mgmt_user_confirm_neg_reply_complete(u16 index, bdaddr_t *bdaddr, u8 status) +int mgmt_user_confirm_neg_reply_complete(struct hci_dev *hdev, + bdaddr_t *bdaddr, u8 status) { - return confirm_reply_complete(index, bdaddr, status, + return user_pairing_resp_complete(hdev, bdaddr, status, MGMT_OP_USER_CONFIRM_NEG_REPLY); } -int mgmt_auth_failed(u16 index, bdaddr_t *bdaddr, u8 status) +int mgmt_user_passkey_reply_complete(struct hci_dev *hdev, bdaddr_t *bdaddr, + u8 status) +{ + return user_pairing_resp_complete(hdev, bdaddr, status, + MGMT_OP_USER_PASSKEY_REPLY); +} + +int mgmt_user_passkey_neg_reply_complete(struct hci_dev *hdev, + bdaddr_t *bdaddr, u8 status) +{ + return user_pairing_resp_complete(hdev, bdaddr, status, + MGMT_OP_USER_PASSKEY_NEG_REPLY); +} + +int mgmt_auth_failed(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 status) { struct mgmt_ev_auth_failed ev; bacpy(&ev.bdaddr, bdaddr); - ev.status = status; + ev.status = mgmt_status(status); - return mgmt_event(MGMT_EV_AUTH_FAILED, index, &ev, sizeof(ev), NULL); + return mgmt_event(MGMT_EV_AUTH_FAILED, hdev, &ev, sizeof(ev), NULL); } -int mgmt_set_local_name_complete(u16 index, u8 *name, u8 status) +int mgmt_set_local_name_complete(struct hci_dev *hdev, u8 *name, u8 status) { struct pending_cmd *cmd; - struct hci_dev *hdev; struct mgmt_cp_set_local_name ev; int err; memset(&ev, 0, sizeof(ev)); memcpy(ev.name, name, HCI_MAX_NAME_LENGTH); - cmd = mgmt_pending_find(MGMT_OP_SET_LOCAL_NAME, index); + cmd = mgmt_pending_find(MGMT_OP_SET_LOCAL_NAME, hdev); if (!cmd) goto send_event; if (status) { - err = cmd_status(cmd->sk, index, MGMT_OP_SET_LOCAL_NAME, EIO); + err = cmd_status(cmd->sk, hdev->id, MGMT_OP_SET_LOCAL_NAME, + mgmt_status(status)); goto failed; } - hdev = hci_dev_get(index); - if (hdev) { - hci_dev_lock_bh(hdev); - update_eir(hdev); - hci_dev_unlock_bh(hdev); - hci_dev_put(hdev); - } + update_eir(hdev); - err = cmd_complete(cmd->sk, index, MGMT_OP_SET_LOCAL_NAME, &ev, + err = cmd_complete(cmd->sk, hdev->id, MGMT_OP_SET_LOCAL_NAME, &ev, sizeof(ev)); if (err < 0) goto failed; send_event: - err = mgmt_event(MGMT_EV_LOCAL_NAME_CHANGED, index, &ev, sizeof(ev), + err = mgmt_event(MGMT_EV_LOCAL_NAME_CHANGED, hdev, &ev, sizeof(ev), cmd ? cmd->sk : NULL); failed: @@ -2222,29 +2668,31 @@ failed: return err; } -int mgmt_read_local_oob_data_reply_complete(u16 index, u8 *hash, u8 *randomizer, - u8 status) +int mgmt_read_local_oob_data_reply_complete(struct hci_dev *hdev, u8 *hash, + u8 *randomizer, u8 status) { struct pending_cmd *cmd; int err; - BT_DBG("hci%u status %u", index, status); + BT_DBG("%s status %u", hdev->name, status); - cmd = mgmt_pending_find(MGMT_OP_READ_LOCAL_OOB_DATA, index); + cmd = mgmt_pending_find(MGMT_OP_READ_LOCAL_OOB_DATA, hdev); if (!cmd) return -ENOENT; if (status) { - err = cmd_status(cmd->sk, index, MGMT_OP_READ_LOCAL_OOB_DATA, - EIO); + err = cmd_status(cmd->sk, hdev->id, + MGMT_OP_READ_LOCAL_OOB_DATA, + mgmt_status(status)); } else { struct mgmt_rp_read_local_oob_data rp; memcpy(rp.hash, hash, sizeof(rp.hash)); memcpy(rp.randomizer, randomizer, sizeof(rp.randomizer)); - err = cmd_complete(cmd->sk, index, MGMT_OP_READ_LOCAL_OOB_DATA, - &rp, sizeof(rp)); + err = cmd_complete(cmd->sk, hdev->id, + MGMT_OP_READ_LOCAL_OOB_DATA, + &rp, sizeof(rp)); } mgmt_pending_remove(cmd); @@ -2252,24 +2700,27 @@ int mgmt_read_local_oob_data_reply_complete(u16 index, u8 *hash, u8 *randomizer, return err; } -int mgmt_device_found(u16 index, bdaddr_t *bdaddr, u8 *dev_class, s8 rssi, - u8 *eir) +int mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, + u8 addr_type, u8 *dev_class, s8 rssi, u8 *eir) { struct mgmt_ev_device_found ev; memset(&ev, 0, sizeof(ev)); - bacpy(&ev.bdaddr, bdaddr); - memcpy(ev.dev_class, dev_class, sizeof(ev.dev_class)); + bacpy(&ev.addr.bdaddr, bdaddr); + ev.addr.type = link_to_mgmt(link_type, addr_type); ev.rssi = rssi; if (eir) memcpy(ev.eir, eir, sizeof(ev.eir)); - return mgmt_event(MGMT_EV_DEVICE_FOUND, index, &ev, sizeof(ev), NULL); + if (dev_class) + memcpy(ev.dev_class, dev_class, sizeof(ev.dev_class)); + + return mgmt_event(MGMT_EV_DEVICE_FOUND, hdev, &ev, sizeof(ev), NULL); } -int mgmt_remote_name(u16 index, bdaddr_t *bdaddr, u8 *name) +int mgmt_remote_name(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 *name) { struct mgmt_ev_remote_name ev; @@ -2278,11 +2729,79 @@ int mgmt_remote_name(u16 index, bdaddr_t *bdaddr, u8 *name) bacpy(&ev.bdaddr, bdaddr); memcpy(ev.name, name, HCI_MAX_NAME_LENGTH); - return mgmt_event(MGMT_EV_REMOTE_NAME, index, &ev, sizeof(ev), NULL); + return mgmt_event(MGMT_EV_REMOTE_NAME, hdev, &ev, sizeof(ev), NULL); +} + +int mgmt_start_discovery_failed(struct hci_dev *hdev, u8 status) +{ + struct pending_cmd *cmd; + int err; + + cmd = mgmt_pending_find(MGMT_OP_START_DISCOVERY, hdev); + if (!cmd) + return -ENOENT; + + err = cmd_status(cmd->sk, hdev->id, cmd->opcode, mgmt_status(status)); + mgmt_pending_remove(cmd); + + return err; +} + +int mgmt_stop_discovery_failed(struct hci_dev *hdev, u8 status) +{ + struct pending_cmd *cmd; + int err; + + cmd = mgmt_pending_find(MGMT_OP_STOP_DISCOVERY, hdev); + if (!cmd) + return -ENOENT; + + err = cmd_status(cmd->sk, hdev->id, cmd->opcode, mgmt_status(status)); + mgmt_pending_remove(cmd); + + return err; } -int mgmt_discovering(u16 index, u8 discovering) +int mgmt_discovering(struct hci_dev *hdev, u8 discovering) { - return mgmt_event(MGMT_EV_DISCOVERING, index, &discovering, + struct pending_cmd *cmd; + + if (discovering) + cmd = mgmt_pending_find(MGMT_OP_START_DISCOVERY, hdev); + else + cmd = mgmt_pending_find(MGMT_OP_STOP_DISCOVERY, hdev); + + if (cmd != NULL) { + cmd_complete(cmd->sk, hdev->id, cmd->opcode, NULL, 0); + mgmt_pending_remove(cmd); + } + + return mgmt_event(MGMT_EV_DISCOVERING, hdev, &discovering, sizeof(discovering), NULL); } + +int mgmt_device_blocked(struct hci_dev *hdev, bdaddr_t *bdaddr) +{ + struct pending_cmd *cmd; + struct mgmt_ev_device_blocked ev; + + cmd = mgmt_pending_find(MGMT_OP_BLOCK_DEVICE, hdev); + + bacpy(&ev.bdaddr, bdaddr); + + return mgmt_event(MGMT_EV_DEVICE_BLOCKED, hdev, &ev, sizeof(ev), + cmd ? cmd->sk : NULL); +} + +int mgmt_device_unblocked(struct hci_dev *hdev, bdaddr_t *bdaddr) +{ + struct pending_cmd *cmd; + struct mgmt_ev_device_unblocked ev; + + cmd = mgmt_pending_find(MGMT_OP_UNBLOCK_DEVICE, hdev); + + bacpy(&ev.bdaddr, bdaddr); + + return mgmt_event(MGMT_EV_DEVICE_UNBLOCKED, hdev, &ev, sizeof(ev), + cmd ? cmd->sk : NULL); +} diff --git a/net/bluetooth/rfcomm/Kconfig b/net/bluetooth/rfcomm/Kconfig index 405a0e61e7dc..22e718b554e4 100644 --- a/net/bluetooth/rfcomm/Kconfig +++ b/net/bluetooth/rfcomm/Kconfig @@ -1,6 +1,6 @@ config BT_RFCOMM tristate "RFCOMM protocol support" - depends on BT && BT_L2CAP + depends on BT help RFCOMM provides connection oriented stream transport. RFCOMM support is required for Dialup Networking, OBEX and other Bluetooth diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index 5ba3f6df665c..501649bf5596 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -51,8 +51,8 @@ #define VERSION "1.11" -static int disable_cfc; -static int l2cap_ertm; +static bool disable_cfc; +static bool l2cap_ertm; static int channel_mtu = -1; static unsigned int l2cap_mtu = RFCOMM_MAX_L2CAP_MTU; @@ -377,13 +377,11 @@ static void rfcomm_dlc_unlink(struct rfcomm_dlc *d) static struct rfcomm_dlc *rfcomm_dlc_get(struct rfcomm_session *s, u8 dlci) { struct rfcomm_dlc *d; - struct list_head *p; - list_for_each(p, &s->dlcs) { - d = list_entry(p, struct rfcomm_dlc, list); + list_for_each_entry(d, &s->dlcs, list) if (d->dlci == dlci) return d; - } + return NULL; } @@ -751,7 +749,6 @@ void rfcomm_session_getaddr(struct rfcomm_session *s, bdaddr_t *src, bdaddr_t *d /* ---- RFCOMM frame sending ---- */ static int rfcomm_send_frame(struct rfcomm_session *s, u8 *data, int len) { - struct socket *sock = s->sock; struct kvec iv = { data, len }; struct msghdr msg; @@ -759,7 +756,14 @@ static int rfcomm_send_frame(struct rfcomm_session *s, u8 *data, int len) memset(&msg, 0, sizeof(msg)); - return kernel_sendmsg(sock, &msg, &iv, 1, len); + return kernel_sendmsg(s->sock, &msg, &iv, 1, len); +} + +static int rfcomm_send_cmd(struct rfcomm_session *s, struct rfcomm_cmd *cmd) +{ + BT_DBG("%p cmd %u", s, cmd->ctrl); + + return rfcomm_send_frame(s, (void *) cmd, sizeof(*cmd)); } static int rfcomm_send_sabm(struct rfcomm_session *s, u8 dlci) @@ -773,7 +777,7 @@ static int rfcomm_send_sabm(struct rfcomm_session *s, u8 dlci) cmd.len = __len8(0); cmd.fcs = __fcs2((u8 *) &cmd); - return rfcomm_send_frame(s, (void *) &cmd, sizeof(cmd)); + return rfcomm_send_cmd(s, &cmd); } static int rfcomm_send_ua(struct rfcomm_session *s, u8 dlci) @@ -787,7 +791,7 @@ static int rfcomm_send_ua(struct rfcomm_session *s, u8 dlci) cmd.len = __len8(0); cmd.fcs = __fcs2((u8 *) &cmd); - return rfcomm_send_frame(s, (void *) &cmd, sizeof(cmd)); + return rfcomm_send_cmd(s, &cmd); } static int rfcomm_send_disc(struct rfcomm_session *s, u8 dlci) @@ -801,7 +805,7 @@ static int rfcomm_send_disc(struct rfcomm_session *s, u8 dlci) cmd.len = __len8(0); cmd.fcs = __fcs2((u8 *) &cmd); - return rfcomm_send_frame(s, (void *) &cmd, sizeof(cmd)); + return rfcomm_send_cmd(s, &cmd); } static int rfcomm_queue_disc(struct rfcomm_dlc *d) @@ -837,7 +841,7 @@ static int rfcomm_send_dm(struct rfcomm_session *s, u8 dlci) cmd.len = __len8(0); cmd.fcs = __fcs2((u8 *) &cmd); - return rfcomm_send_frame(s, (void *) &cmd, sizeof(cmd)); + return rfcomm_send_cmd(s, &cmd); } static int rfcomm_send_nsc(struct rfcomm_session *s, int cr, u8 type) @@ -1146,6 +1150,7 @@ static int rfcomm_recv_ua(struct rfcomm_session *s, u8 dlci) if (list_empty(&s->dlcs)) { s->state = BT_DISCONN; rfcomm_send_disc(s, 0); + rfcomm_session_clear_timer(s); } break; @@ -1802,6 +1807,11 @@ static inline void rfcomm_process_dlcs(struct rfcomm_session *s) continue; } + if (test_bit(RFCOMM_ENC_DROP, &d->flags)) { + __rfcomm_dlc_close(d, ECONNREFUSED); + continue; + } + if (test_and_clear_bit(RFCOMM_AUTH_ACCEPT, &d->flags)) { rfcomm_dlc_clear_timer(d); if (d->out) { @@ -1853,7 +1863,10 @@ static inline void rfcomm_process_rx(struct rfcomm_session *s) /* Get data directly from socket receive queue without copying it. */ while ((skb = skb_dequeue(&sk->sk_receive_queue))) { skb_orphan(skb); - rfcomm_recv_frame(s, skb); + if (!skb_linearize(skb)) + rfcomm_recv_frame(s, skb); + else + kfree_skb(skb); } if (sk->sk_state == BT_CLOSED) { @@ -2074,7 +2087,7 @@ static void rfcomm_security_cfm(struct hci_conn *conn, u8 status, u8 encrypt) if (test_and_clear_bit(RFCOMM_SEC_PENDING, &d->flags)) { rfcomm_dlc_clear_timer(d); if (status || encrypt == 0x00) { - __rfcomm_dlc_close(d, ECONNREFUSED); + set_bit(RFCOMM_ENC_DROP, &d->flags); continue; } } @@ -2085,7 +2098,7 @@ static void rfcomm_security_cfm(struct hci_conn *conn, u8 status, u8 encrypt) rfcomm_dlc_set_timer(d, RFCOMM_AUTH_TIMEOUT); continue; } else if (d->sec_level == BT_SECURITY_HIGH) { - __rfcomm_dlc_close(d, ECONNREFUSED); + set_bit(RFCOMM_ENC_DROP, &d->flags); continue; } } @@ -2112,15 +2125,13 @@ static struct hci_cb rfcomm_cb = { static int rfcomm_dlc_debugfs_show(struct seq_file *f, void *x) { struct rfcomm_session *s; - struct list_head *pp, *p; rfcomm_lock(); - list_for_each(p, &session_list) { - s = list_entry(p, struct rfcomm_session, list); - list_for_each(pp, &s->dlcs) { + list_for_each_entry(s, &session_list, list) { + struct rfcomm_dlc *d; + list_for_each_entry(d, &s->dlcs, list) { struct sock *sk = s->sock->sk; - struct rfcomm_dlc *d = list_entry(pp, struct rfcomm_dlc, list); seq_printf(f, "%s %s %ld %d %d %d %d\n", batostr(&bt_sk(sk)->src), diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index 5417f6127323..f066678faeee 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -370,7 +370,7 @@ static int rfcomm_sock_bind(struct socket *sock, struct sockaddr *addr, int addr goto done; } - write_lock_bh(&rfcomm_sk_list.lock); + write_lock(&rfcomm_sk_list.lock); if (sa->rc_channel && __rfcomm_get_sock_by_addr(sa->rc_channel, &sa->rc_bdaddr)) { err = -EADDRINUSE; @@ -381,7 +381,7 @@ static int rfcomm_sock_bind(struct socket *sock, struct sockaddr *addr, int addr sk->sk_state = BT_BOUND; } - write_unlock_bh(&rfcomm_sk_list.lock); + write_unlock(&rfcomm_sk_list.lock); done: release_sock(sk); @@ -455,7 +455,7 @@ static int rfcomm_sock_listen(struct socket *sock, int backlog) err = -EINVAL; - write_lock_bh(&rfcomm_sk_list.lock); + write_lock(&rfcomm_sk_list.lock); for (channel = 1; channel < 31; channel++) if (!__rfcomm_get_sock_by_addr(channel, src)) { @@ -464,7 +464,7 @@ static int rfcomm_sock_listen(struct socket *sock, int backlog) break; } - write_unlock_bh(&rfcomm_sk_list.lock); + write_unlock(&rfcomm_sk_list.lock); if (err < 0) goto done; @@ -600,6 +600,8 @@ static int rfcomm_sock_sendmsg(struct kiocb *iocb, struct socket *sock, break; } + skb->priority = sk->sk_priority; + err = rfcomm_dlc_send(d, skb); if (err < 0) { kfree_skb(skb); @@ -980,7 +982,7 @@ static int rfcomm_sock_debugfs_show(struct seq_file *f, void *p) struct sock *sk; struct hlist_node *node; - read_lock_bh(&rfcomm_sk_list.lock); + read_lock(&rfcomm_sk_list.lock); sk_for_each(sk, node, &rfcomm_sk_list.head) { seq_printf(f, "%s %s %d %d\n", @@ -989,7 +991,7 @@ static int rfcomm_sock_debugfs_show(struct seq_file *f, void *p) sk->sk_state, rfcomm_pi(sk)->channel); } - read_unlock_bh(&rfcomm_sk_list.lock); + read_unlock(&rfcomm_sk_list.lock); return 0; } diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c index c258796313e0..a2d4f5122a6a 100644 --- a/net/bluetooth/rfcomm/tty.c +++ b/net/bluetooth/rfcomm/tty.c @@ -34,6 +34,7 @@ #include <linux/capability.h> #include <linux/slab.h> #include <linux/skbuff.h> +#include <linux/workqueue.h> #include <net/bluetooth/bluetooth.h> #include <net/bluetooth/hci_core.h> @@ -65,7 +66,7 @@ struct rfcomm_dev { struct rfcomm_dlc *dlc; struct tty_struct *tty; wait_queue_head_t wait; - struct tasklet_struct wakeup_task; + struct work_struct wakeup_task; struct device *tty_dev; @@ -75,13 +76,13 @@ struct rfcomm_dev { }; static LIST_HEAD(rfcomm_dev_list); -static DEFINE_RWLOCK(rfcomm_dev_lock); +static DEFINE_SPINLOCK(rfcomm_dev_lock); static void rfcomm_dev_data_ready(struct rfcomm_dlc *dlc, struct sk_buff *skb); static void rfcomm_dev_state_change(struct rfcomm_dlc *dlc, int err); static void rfcomm_dev_modem_status(struct rfcomm_dlc *dlc, u8 v24_sig); -static void rfcomm_tty_wakeup(unsigned long arg); +static void rfcomm_tty_wakeup(struct work_struct *work); /* ---- Device functions ---- */ static void rfcomm_dev_destruct(struct rfcomm_dev *dev) @@ -133,13 +134,10 @@ static inline void rfcomm_dev_put(struct rfcomm_dev *dev) static struct rfcomm_dev *__rfcomm_dev_get(int id) { struct rfcomm_dev *dev; - struct list_head *p; - list_for_each(p, &rfcomm_dev_list) { - dev = list_entry(p, struct rfcomm_dev, list); + list_for_each_entry(dev, &rfcomm_dev_list, list) if (dev->id == id) return dev; - } return NULL; } @@ -148,7 +146,7 @@ static inline struct rfcomm_dev *rfcomm_dev_get(int id) { struct rfcomm_dev *dev; - read_lock(&rfcomm_dev_lock); + spin_lock(&rfcomm_dev_lock); dev = __rfcomm_dev_get(id); @@ -159,7 +157,7 @@ static inline struct rfcomm_dev *rfcomm_dev_get(int id) rfcomm_dev_hold(dev); } - read_unlock(&rfcomm_dev_lock); + spin_unlock(&rfcomm_dev_lock); return dev; } @@ -197,7 +195,7 @@ static DEVICE_ATTR(channel, S_IRUGO, show_channel, NULL); static int rfcomm_dev_add(struct rfcomm_dev_req *req, struct rfcomm_dlc *dlc) { - struct rfcomm_dev *dev; + struct rfcomm_dev *dev, *entry; struct list_head *head = &rfcomm_dev_list, *p; int err = 0; @@ -207,13 +205,13 @@ static int rfcomm_dev_add(struct rfcomm_dev_req *req, struct rfcomm_dlc *dlc) if (!dev) return -ENOMEM; - write_lock_bh(&rfcomm_dev_lock); + spin_lock(&rfcomm_dev_lock); if (req->dev_id < 0) { dev->id = 0; - list_for_each(p, &rfcomm_dev_list) { - if (list_entry(p, struct rfcomm_dev, list)->id != dev->id) + list_for_each_entry(entry, &rfcomm_dev_list, list) { + if (entry->id != dev->id) break; dev->id++; @@ -222,9 +220,7 @@ static int rfcomm_dev_add(struct rfcomm_dev_req *req, struct rfcomm_dlc *dlc) } else { dev->id = req->dev_id; - list_for_each(p, &rfcomm_dev_list) { - struct rfcomm_dev *entry = list_entry(p, struct rfcomm_dev, list); - + list_for_each_entry(entry, &rfcomm_dev_list, list) { if (entry->id == dev->id) { err = -EADDRINUSE; goto out; @@ -257,7 +253,7 @@ static int rfcomm_dev_add(struct rfcomm_dev_req *req, struct rfcomm_dlc *dlc) atomic_set(&dev->opened, 0); init_waitqueue_head(&dev->wait); - tasklet_init(&dev->wakeup_task, rfcomm_tty_wakeup, (unsigned long) dev); + INIT_WORK(&dev->wakeup_task, rfcomm_tty_wakeup); skb_queue_head_init(&dev->pending); @@ -294,7 +290,7 @@ static int rfcomm_dev_add(struct rfcomm_dev_req *req, struct rfcomm_dlc *dlc) __module_get(THIS_MODULE); out: - write_unlock_bh(&rfcomm_dev_lock); + spin_unlock(&rfcomm_dev_lock); if (err < 0) goto free; @@ -331,9 +327,9 @@ static void rfcomm_dev_del(struct rfcomm_dev *dev) if (atomic_read(&dev->opened) > 0) return; - write_lock_bh(&rfcomm_dev_lock); + spin_lock(&rfcomm_dev_lock); list_del_init(&dev->list); - write_unlock_bh(&rfcomm_dev_lock); + spin_unlock(&rfcomm_dev_lock); rfcomm_dev_put(dev); } @@ -351,7 +347,7 @@ static void rfcomm_wfree(struct sk_buff *skb) struct rfcomm_dev *dev = (void *) skb->sk; atomic_sub(skb->truesize, &dev->wmem_alloc); if (test_bit(RFCOMM_TTY_ATTACHED, &dev->flags)) - tasklet_schedule(&dev->wakeup_task); + queue_work(system_nrt_wq, &dev->wakeup_task); rfcomm_dev_put(dev); } @@ -455,9 +451,9 @@ static int rfcomm_release_dev(void __user *arg) static int rfcomm_get_dev_list(void __user *arg) { + struct rfcomm_dev *dev; struct rfcomm_dev_list_req *dl; struct rfcomm_dev_info *di; - struct list_head *p; int n = 0, size, err; u16 dev_num; @@ -477,10 +473,9 @@ static int rfcomm_get_dev_list(void __user *arg) di = dl->dev_info; - read_lock_bh(&rfcomm_dev_lock); + spin_lock(&rfcomm_dev_lock); - list_for_each(p, &rfcomm_dev_list) { - struct rfcomm_dev *dev = list_entry(p, struct rfcomm_dev, list); + list_for_each_entry(dev, &rfcomm_dev_list, list) { if (test_bit(RFCOMM_TTY_RELEASED, &dev->flags)) continue; (di + n)->id = dev->id; @@ -493,7 +488,7 @@ static int rfcomm_get_dev_list(void __user *arg) break; } - read_unlock_bh(&rfcomm_dev_lock); + spin_unlock(&rfcomm_dev_lock); dl->dev_num = n; size = sizeof(*dl) + n * sizeof(*di); @@ -635,9 +630,10 @@ static void rfcomm_dev_modem_status(struct rfcomm_dlc *dlc, u8 v24_sig) } /* ---- TTY functions ---- */ -static void rfcomm_tty_wakeup(unsigned long arg) +static void rfcomm_tty_wakeup(struct work_struct *work) { - struct rfcomm_dev *dev = (void *) arg; + struct rfcomm_dev *dev = container_of(work, struct rfcomm_dev, + wakeup_task); struct tty_struct *tty = dev->tty; if (!tty) return; @@ -762,7 +758,7 @@ static void rfcomm_tty_close(struct tty_struct *tty, struct file *filp) rfcomm_dlc_close(dev->dlc, 0); clear_bit(RFCOMM_TTY_ATTACHED, &dev->flags); - tasklet_kill(&dev->wakeup_task); + cancel_work_sync(&dev->wakeup_task); rfcomm_dlc_lock(dev->dlc); tty->driver_data = NULL; @@ -770,9 +766,9 @@ static void rfcomm_tty_close(struct tty_struct *tty, struct file *filp) rfcomm_dlc_unlock(dev->dlc); if (test_bit(RFCOMM_TTY_RELEASED, &dev->flags)) { - write_lock_bh(&rfcomm_dev_lock); + spin_lock(&rfcomm_dev_lock); list_del_init(&dev->list); - write_unlock_bh(&rfcomm_dev_lock); + spin_unlock(&rfcomm_dev_lock); rfcomm_dev_put(dev); } @@ -1155,9 +1151,11 @@ static const struct tty_operations rfcomm_ops = { int __init rfcomm_init_ttys(void) { + int error; + rfcomm_tty_driver = alloc_tty_driver(RFCOMM_TTY_PORTS); if (!rfcomm_tty_driver) - return -1; + return -ENOMEM; rfcomm_tty_driver->owner = THIS_MODULE; rfcomm_tty_driver->driver_name = "rfcomm"; @@ -1172,10 +1170,11 @@ int __init rfcomm_init_ttys(void) rfcomm_tty_driver->init_termios.c_lflag &= ~ICANON; tty_set_operations(rfcomm_tty_driver, &rfcomm_ops); - if (tty_register_driver(rfcomm_tty_driver)) { + error = tty_register_driver(rfcomm_tty_driver); + if (error) { BT_ERR("Can't register RFCOMM TTY driver"); put_tty_driver(rfcomm_tty_driver); - return -1; + return error; } BT_INFO("RFCOMM TTY layer initialized"); diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index a324b009e34b..8bf26d1bc5c1 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -51,7 +51,7 @@ #include <net/bluetooth/hci_core.h> #include <net/bluetooth/sco.h> -static int disable_esco; +static bool disable_esco; static const struct proto_ops sco_sock_ops; @@ -189,7 +189,7 @@ static int sco_connect(struct sock *sk) if (!hdev) return -EHOSTUNREACH; - hci_dev_lock_bh(hdev); + hci_dev_lock(hdev); if (lmp_esco_capable(hdev) && !disable_esco) type = ESCO_LINK; @@ -225,7 +225,7 @@ static int sco_connect(struct sock *sk) } done: - hci_dev_unlock_bh(hdev); + hci_dev_unlock(hdev); hci_dev_put(hdev); return err; } @@ -482,7 +482,7 @@ static int sco_sock_bind(struct socket *sock, struct sockaddr *addr, int addr_le goto done; } - write_lock_bh(&sco_sk_list.lock); + write_lock(&sco_sk_list.lock); if (bacmp(src, BDADDR_ANY) && __sco_get_sock_by_addr(src)) { err = -EADDRINUSE; @@ -492,7 +492,7 @@ static int sco_sock_bind(struct socket *sock, struct sockaddr *addr, int addr_le sk->sk_state = BT_BOUND; } - write_unlock_bh(&sco_sk_list.lock); + write_unlock(&sco_sk_list.lock); done: release_sock(sk); @@ -893,15 +893,12 @@ done: } /* ----- SCO interface with lower layer (HCI) ----- */ -static int sco_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 type) +int sco_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr) { register struct sock *sk; struct hlist_node *node; int lm = 0; - if (type != SCO_LINK && type != ESCO_LINK) - return -EINVAL; - BT_DBG("hdev %s, bdaddr %s", hdev->name, batostr(bdaddr)); /* Find listening sockets */ @@ -921,13 +918,9 @@ static int sco_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 type) return lm; } -static int sco_connect_cfm(struct hci_conn *hcon, __u8 status) +int sco_connect_cfm(struct hci_conn *hcon, __u8 status) { BT_DBG("hcon %p bdaddr %s status %d", hcon, batostr(&hcon->dst), status); - - if (hcon->type != SCO_LINK && hcon->type != ESCO_LINK) - return -EINVAL; - if (!status) { struct sco_conn *conn; @@ -940,19 +933,15 @@ static int sco_connect_cfm(struct hci_conn *hcon, __u8 status) return 0; } -static int sco_disconn_cfm(struct hci_conn *hcon, __u8 reason) +int sco_disconn_cfm(struct hci_conn *hcon, __u8 reason) { BT_DBG("hcon %p reason %d", hcon, reason); - if (hcon->type != SCO_LINK && hcon->type != ESCO_LINK) - return -EINVAL; - sco_conn_del(hcon, bt_to_errno(reason)); - return 0; } -static int sco_recv_scodata(struct hci_conn *hcon, struct sk_buff *skb) +int sco_recv_scodata(struct hci_conn *hcon, struct sk_buff *skb) { struct sco_conn *conn = hcon->sco_data; @@ -976,14 +965,14 @@ static int sco_debugfs_show(struct seq_file *f, void *p) struct sock *sk; struct hlist_node *node; - read_lock_bh(&sco_sk_list.lock); + read_lock(&sco_sk_list.lock); sk_for_each(sk, node, &sco_sk_list.head) { seq_printf(f, "%s %s %d\n", batostr(&bt_sk(sk)->src), batostr(&bt_sk(sk)->dst), sk->sk_state); } - read_unlock_bh(&sco_sk_list.lock); + read_unlock(&sco_sk_list.lock); return 0; } @@ -1028,15 +1017,6 @@ static const struct net_proto_family sco_sock_family_ops = { .create = sco_sock_create, }; -static struct hci_proto sco_hci_proto = { - .name = "SCO", - .id = HCI_PROTO_SCO, - .connect_ind = sco_connect_ind, - .connect_cfm = sco_connect_cfm, - .disconn_cfm = sco_disconn_cfm, - .recv_scodata = sco_recv_scodata -}; - int __init sco_init(void) { int err; @@ -1051,13 +1031,6 @@ int __init sco_init(void) goto error; } - err = hci_register_proto(&sco_hci_proto); - if (err < 0) { - BT_ERR("SCO protocol registration failed"); - bt_sock_unregister(BTPROTO_SCO); - goto error; - } - if (bt_debugfs) { sco_debugfs = debugfs_create_file("sco", 0444, bt_debugfs, NULL, &sco_debugfs_fops); @@ -1081,9 +1054,6 @@ void __exit sco_exit(void) if (bt_sock_unregister(BTPROTO_SCO) < 0) BT_ERR("SCO socket unregistration failed"); - if (hci_unregister_proto(&sco_hci_proto) < 0) - BT_ERR("SCO protocol unregistration failed"); - proto_unregister(&sco_proto); } diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index 391888b88a92..32c47de30344 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -23,6 +23,7 @@ #include <net/bluetooth/bluetooth.h> #include <net/bluetooth/hci_core.h> #include <net/bluetooth/l2cap.h> +#include <net/bluetooth/mgmt.h> #include <net/bluetooth/smp.h> #include <linux/crypto.h> #include <linux/scatterlist.h> @@ -181,16 +182,29 @@ static void smp_send_cmd(struct l2cap_conn *conn, u8 code, u16 len, void *data) if (!skb) return; - hci_send_acl(conn->hcon, skb, 0); + skb->priority = HCI_PRIO_MAX; + hci_send_acl(conn->hchan, skb, 0); + + cancel_delayed_work_sync(&conn->security_timer); + schedule_delayed_work(&conn->security_timer, + msecs_to_jiffies(SMP_TIMEOUT)); } -static __u8 seclevel_to_authreq(__u8 level) +static __u8 authreq_to_seclevel(__u8 authreq) { - switch (level) { - case BT_SECURITY_HIGH: - /* Right now we don't support bonding */ - return SMP_AUTH_MITM; + if (authreq & SMP_AUTH_MITM) + return BT_SECURITY_HIGH; + else + return BT_SECURITY_MEDIUM; +} +static __u8 seclevel_to_authreq(__u8 sec_level) +{ + switch (sec_level) { + case BT_SECURITY_HIGH: + return SMP_AUTH_MITM | SMP_AUTH_BONDING; + case BT_SECURITY_MEDIUM: + return SMP_AUTH_BONDING; default: return SMP_AUTH_NONE; } @@ -201,19 +215,20 @@ static void build_pairing_cmd(struct l2cap_conn *conn, struct smp_cmd_pairing *rsp, __u8 authreq) { - u8 dist_keys; + u8 dist_keys = 0; - dist_keys = 0; if (test_bit(HCI_PAIRABLE, &conn->hcon->hdev->flags)) { - dist_keys = SMP_DIST_ENC_KEY | SMP_DIST_ID_KEY | SMP_DIST_SIGN; + dist_keys = SMP_DIST_ENC_KEY; authreq |= SMP_AUTH_BONDING; + } else { + authreq &= ~SMP_AUTH_BONDING; } if (rsp == NULL) { req->io_capability = conn->hcon->io_capability; req->oob_flag = SMP_OOB_NOT_PRESENT; req->max_key_size = SMP_MAX_ENC_KEY_SIZE; - req->init_key_dist = dist_keys; + req->init_key_dist = 0; req->resp_key_dist = dist_keys; req->auth_req = authreq; return; @@ -222,18 +237,323 @@ static void build_pairing_cmd(struct l2cap_conn *conn, rsp->io_capability = conn->hcon->io_capability; rsp->oob_flag = SMP_OOB_NOT_PRESENT; rsp->max_key_size = SMP_MAX_ENC_KEY_SIZE; - rsp->init_key_dist = req->init_key_dist & dist_keys; + rsp->init_key_dist = 0; rsp->resp_key_dist = req->resp_key_dist & dist_keys; rsp->auth_req = authreq; } static u8 check_enc_key_size(struct l2cap_conn *conn, __u8 max_key_size) { + struct smp_chan *smp = conn->smp_chan; + if ((max_key_size > SMP_MAX_ENC_KEY_SIZE) || (max_key_size < SMP_MIN_ENC_KEY_SIZE)) return SMP_ENC_KEY_SIZE; - conn->smp_key_size = max_key_size; + smp->smp_key_size = max_key_size; + + return 0; +} + +static void smp_failure(struct l2cap_conn *conn, u8 reason, u8 send) +{ + if (send) + smp_send_cmd(conn, SMP_CMD_PAIRING_FAIL, sizeof(reason), + &reason); + + clear_bit(HCI_CONN_ENCRYPT_PEND, &conn->hcon->pend); + mgmt_auth_failed(conn->hcon->hdev, conn->dst, reason); + cancel_delayed_work_sync(&conn->security_timer); + smp_chan_destroy(conn); +} + +#define JUST_WORKS 0x00 +#define JUST_CFM 0x01 +#define REQ_PASSKEY 0x02 +#define CFM_PASSKEY 0x03 +#define REQ_OOB 0x04 +#define OVERLAP 0xFF + +static const u8 gen_method[5][5] = { + { JUST_WORKS, JUST_CFM, REQ_PASSKEY, JUST_WORKS, REQ_PASSKEY }, + { JUST_WORKS, JUST_CFM, REQ_PASSKEY, JUST_WORKS, REQ_PASSKEY }, + { CFM_PASSKEY, CFM_PASSKEY, REQ_PASSKEY, JUST_WORKS, CFM_PASSKEY }, + { JUST_WORKS, JUST_CFM, JUST_WORKS, JUST_WORKS, JUST_CFM }, + { CFM_PASSKEY, CFM_PASSKEY, REQ_PASSKEY, JUST_WORKS, OVERLAP }, +}; + +static int tk_request(struct l2cap_conn *conn, u8 remote_oob, u8 auth, + u8 local_io, u8 remote_io) +{ + struct hci_conn *hcon = conn->hcon; + struct smp_chan *smp = conn->smp_chan; + u8 method; + u32 passkey = 0; + int ret = 0; + + /* Initialize key for JUST WORKS */ + memset(smp->tk, 0, sizeof(smp->tk)); + clear_bit(SMP_FLAG_TK_VALID, &smp->smp_flags); + + BT_DBG("tk_request: auth:%d lcl:%d rem:%d", auth, local_io, remote_io); + + /* If neither side wants MITM, use JUST WORKS */ + /* If either side has unknown io_caps, use JUST WORKS */ + /* Otherwise, look up method from the table */ + if (!(auth & SMP_AUTH_MITM) || + local_io > SMP_IO_KEYBOARD_DISPLAY || + remote_io > SMP_IO_KEYBOARD_DISPLAY) + method = JUST_WORKS; + else + method = gen_method[local_io][remote_io]; + + /* If not bonding, don't ask user to confirm a Zero TK */ + if (!(auth & SMP_AUTH_BONDING) && method == JUST_CFM) + method = JUST_WORKS; + + /* If Just Works, Continue with Zero TK */ + if (method == JUST_WORKS) { + set_bit(SMP_FLAG_TK_VALID, &smp->smp_flags); + return 0; + } + + /* Not Just Works/Confirm results in MITM Authentication */ + if (method != JUST_CFM) + set_bit(SMP_FLAG_MITM_AUTH, &smp->smp_flags); + + /* If both devices have Keyoard-Display I/O, the master + * Confirms and the slave Enters the passkey. + */ + if (method == OVERLAP) { + if (hcon->link_mode & HCI_LM_MASTER) + method = CFM_PASSKEY; + else + method = REQ_PASSKEY; + } + + /* Generate random passkey. Not valid until confirmed. */ + if (method == CFM_PASSKEY) { + u8 key[16]; + + memset(key, 0, sizeof(key)); + get_random_bytes(&passkey, sizeof(passkey)); + passkey %= 1000000; + put_unaligned_le32(passkey, key); + swap128(key, smp->tk); + BT_DBG("PassKey: %d", passkey); + } + + hci_dev_lock(hcon->hdev); + + if (method == REQ_PASSKEY) + ret = mgmt_user_passkey_request(hcon->hdev, conn->dst); + else + ret = mgmt_user_confirm_request(hcon->hdev, conn->dst, + cpu_to_le32(passkey), 0); + + hci_dev_unlock(hcon->hdev); + + return ret; +} + +static void confirm_work(struct work_struct *work) +{ + struct smp_chan *smp = container_of(work, struct smp_chan, confirm); + struct l2cap_conn *conn = smp->conn; + struct crypto_blkcipher *tfm; + struct smp_cmd_pairing_confirm cp; + int ret; + u8 res[16], reason; + + BT_DBG("conn %p", conn); + + tfm = crypto_alloc_blkcipher("ecb(aes)", 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(tfm)) { + reason = SMP_UNSPECIFIED; + goto error; + } + + smp->tfm = tfm; + + if (conn->hcon->out) + ret = smp_c1(tfm, smp->tk, smp->prnd, smp->preq, smp->prsp, 0, + conn->src, conn->hcon->dst_type, conn->dst, + res); + else + ret = smp_c1(tfm, smp->tk, smp->prnd, smp->preq, smp->prsp, + conn->hcon->dst_type, conn->dst, 0, conn->src, + res); + if (ret) { + reason = SMP_UNSPECIFIED; + goto error; + } + + clear_bit(SMP_FLAG_CFM_PENDING, &smp->smp_flags); + + swap128(res, cp.confirm_val); + smp_send_cmd(smp->conn, SMP_CMD_PAIRING_CONFIRM, sizeof(cp), &cp); + + return; + +error: + smp_failure(conn, reason, 1); +} + +static void random_work(struct work_struct *work) +{ + struct smp_chan *smp = container_of(work, struct smp_chan, random); + struct l2cap_conn *conn = smp->conn; + struct hci_conn *hcon = conn->hcon; + struct crypto_blkcipher *tfm = smp->tfm; + u8 reason, confirm[16], res[16], key[16]; + int ret; + + if (IS_ERR_OR_NULL(tfm)) { + reason = SMP_UNSPECIFIED; + goto error; + } + + BT_DBG("conn %p %s", conn, conn->hcon->out ? "master" : "slave"); + + if (hcon->out) + ret = smp_c1(tfm, smp->tk, smp->rrnd, smp->preq, smp->prsp, 0, + conn->src, hcon->dst_type, conn->dst, + res); + else + ret = smp_c1(tfm, smp->tk, smp->rrnd, smp->preq, smp->prsp, + hcon->dst_type, conn->dst, 0, conn->src, + res); + if (ret) { + reason = SMP_UNSPECIFIED; + goto error; + } + + swap128(res, confirm); + + if (memcmp(smp->pcnf, confirm, sizeof(smp->pcnf)) != 0) { + BT_ERR("Pairing failed (confirmation values mismatch)"); + reason = SMP_CONFIRM_FAILED; + goto error; + } + + if (hcon->out) { + u8 stk[16], rand[8]; + __le16 ediv; + + memset(rand, 0, sizeof(rand)); + ediv = 0; + + smp_s1(tfm, smp->tk, smp->rrnd, smp->prnd, key); + swap128(key, stk); + + memset(stk + smp->smp_key_size, 0, + SMP_MAX_ENC_KEY_SIZE - smp->smp_key_size); + + if (test_and_set_bit(HCI_CONN_ENCRYPT_PEND, &hcon->pend)) { + reason = SMP_UNSPECIFIED; + goto error; + } + + hci_le_start_enc(hcon, ediv, rand, stk); + hcon->enc_key_size = smp->smp_key_size; + } else { + u8 stk[16], r[16], rand[8]; + __le16 ediv; + + memset(rand, 0, sizeof(rand)); + ediv = 0; + + swap128(smp->prnd, r); + smp_send_cmd(conn, SMP_CMD_PAIRING_RANDOM, sizeof(r), r); + + smp_s1(tfm, smp->tk, smp->prnd, smp->rrnd, key); + swap128(key, stk); + + memset(stk + smp->smp_key_size, 0, + SMP_MAX_ENC_KEY_SIZE - smp->smp_key_size); + + hci_add_ltk(hcon->hdev, 0, conn->dst, smp->smp_key_size, + ediv, rand, stk); + } + + return; + +error: + smp_failure(conn, reason, 1); +} + +static struct smp_chan *smp_chan_create(struct l2cap_conn *conn) +{ + struct smp_chan *smp; + + smp = kzalloc(sizeof(struct smp_chan), GFP_ATOMIC); + if (!smp) + return NULL; + + INIT_WORK(&smp->confirm, confirm_work); + INIT_WORK(&smp->random, random_work); + + smp->conn = conn; + conn->smp_chan = smp; + conn->hcon->smp_conn = conn; + + hci_conn_hold(conn->hcon); + + return smp; +} + +void smp_chan_destroy(struct l2cap_conn *conn) +{ + struct smp_chan *smp = conn->smp_chan; + + clear_bit(HCI_CONN_LE_SMP_PEND, &conn->hcon->pend); + + if (smp->tfm) + crypto_free_blkcipher(smp->tfm); + + kfree(smp); + conn->smp_chan = NULL; + conn->hcon->smp_conn = NULL; + hci_conn_put(conn->hcon); +} + +int smp_user_confirm_reply(struct hci_conn *hcon, u16 mgmt_op, __le32 passkey) +{ + struct l2cap_conn *conn = hcon->smp_conn; + struct smp_chan *smp; + u32 value; + u8 key[16]; + + BT_DBG(""); + + if (!conn) + return -ENOTCONN; + + smp = conn->smp_chan; + + switch (mgmt_op) { + case MGMT_OP_USER_PASSKEY_REPLY: + value = le32_to_cpu(passkey); + memset(key, 0, sizeof(key)); + BT_DBG("PassKey: %d", value); + put_unaligned_le32(value, key); + swap128(key, smp->tk); + /* Fall Through */ + case MGMT_OP_USER_CONFIRM_REPLY: + set_bit(SMP_FLAG_TK_VALID, &smp->smp_flags); + break; + case MGMT_OP_USER_PASSKEY_NEG_REPLY: + case MGMT_OP_USER_CONFIRM_NEG_REPLY: + smp_failure(conn, SMP_PASSKEY_ENTRY_FAILED, 1); + return 0; + default: + smp_failure(conn, SMP_PASSKEY_ENTRY_FAILED, 1); + return -EOPNOTSUPP; + } + + /* If it is our turn to send Pairing Confirm, do so now */ + if (test_bit(SMP_FLAG_CFM_PENDING, &smp->smp_flags)) + queue_work(hcon->hdev->workqueue, &smp->confirm); return 0; } @@ -241,34 +561,48 @@ static u8 check_enc_key_size(struct l2cap_conn *conn, __u8 max_key_size) static u8 smp_cmd_pairing_req(struct l2cap_conn *conn, struct sk_buff *skb) { struct smp_cmd_pairing rsp, *req = (void *) skb->data; + struct smp_chan *smp; u8 key_size; + u8 auth = SMP_AUTH_NONE; + int ret; BT_DBG("conn %p", conn); - conn->preq[0] = SMP_CMD_PAIRING_REQ; - memcpy(&conn->preq[1], req, sizeof(*req)); + if (conn->hcon->link_mode & HCI_LM_MASTER) + return SMP_CMD_NOTSUPP; + + if (!test_and_set_bit(HCI_CONN_LE_SMP_PEND, &conn->hcon->pend)) + smp = smp_chan_create(conn); + + smp = conn->smp_chan; + + smp->preq[0] = SMP_CMD_PAIRING_REQ; + memcpy(&smp->preq[1], req, sizeof(*req)); skb_pull(skb, sizeof(*req)); - if (req->oob_flag) - return SMP_OOB_NOT_AVAIL; + /* We didn't start the pairing, so match remote */ + if (req->auth_req & SMP_AUTH_BONDING) + auth = req->auth_req; - /* We didn't start the pairing, so no requirements */ - build_pairing_cmd(conn, req, &rsp, SMP_AUTH_NONE); + build_pairing_cmd(conn, req, &rsp, auth); key_size = min(req->max_key_size, rsp.max_key_size); if (check_enc_key_size(conn, key_size)) return SMP_ENC_KEY_SIZE; - /* Just works */ - memset(conn->tk, 0, sizeof(conn->tk)); + ret = smp_rand(smp->prnd); + if (ret) + return SMP_UNSPECIFIED; - conn->prsp[0] = SMP_CMD_PAIRING_RSP; - memcpy(&conn->prsp[1], &rsp, sizeof(rsp)); + smp->prsp[0] = SMP_CMD_PAIRING_RSP; + memcpy(&smp->prsp[1], &rsp, sizeof(rsp)); smp_send_cmd(conn, SMP_CMD_PAIRING_RSP, sizeof(rsp), &rsp); - mod_timer(&conn->security_timer, jiffies + - msecs_to_jiffies(SMP_TIMEOUT)); + /* Request setup of TK */ + ret = tk_request(conn, 0, auth, rsp.io_capability, req->io_capability); + if (ret) + return SMP_UNSPECIFIED; return 0; } @@ -276,187 +610,151 @@ static u8 smp_cmd_pairing_req(struct l2cap_conn *conn, struct sk_buff *skb) static u8 smp_cmd_pairing_rsp(struct l2cap_conn *conn, struct sk_buff *skb) { struct smp_cmd_pairing *req, *rsp = (void *) skb->data; - struct smp_cmd_pairing_confirm cp; - struct crypto_blkcipher *tfm = conn->hcon->hdev->tfm; + struct smp_chan *smp = conn->smp_chan; + struct hci_dev *hdev = conn->hcon->hdev; + u8 key_size, auth = SMP_AUTH_NONE; int ret; - u8 res[16], key_size; BT_DBG("conn %p", conn); + if (!(conn->hcon->link_mode & HCI_LM_MASTER)) + return SMP_CMD_NOTSUPP; + skb_pull(skb, sizeof(*rsp)); - req = (void *) &conn->preq[1]; + req = (void *) &smp->preq[1]; key_size = min(req->max_key_size, rsp->max_key_size); if (check_enc_key_size(conn, key_size)) return SMP_ENC_KEY_SIZE; - if (rsp->oob_flag) - return SMP_OOB_NOT_AVAIL; + ret = smp_rand(smp->prnd); + if (ret) + return SMP_UNSPECIFIED; - /* Just works */ - memset(conn->tk, 0, sizeof(conn->tk)); + smp->prsp[0] = SMP_CMD_PAIRING_RSP; + memcpy(&smp->prsp[1], rsp, sizeof(*rsp)); - conn->prsp[0] = SMP_CMD_PAIRING_RSP; - memcpy(&conn->prsp[1], rsp, sizeof(*rsp)); + if ((req->auth_req & SMP_AUTH_BONDING) && + (rsp->auth_req & SMP_AUTH_BONDING)) + auth = SMP_AUTH_BONDING; - ret = smp_rand(conn->prnd); - if (ret) - return SMP_UNSPECIFIED; + auth |= (req->auth_req | rsp->auth_req) & SMP_AUTH_MITM; - ret = smp_c1(tfm, conn->tk, conn->prnd, conn->preq, conn->prsp, 0, - conn->src, conn->hcon->dst_type, conn->dst, res); + ret = tk_request(conn, 0, auth, rsp->io_capability, req->io_capability); if (ret) return SMP_UNSPECIFIED; - swap128(res, cp.confirm_val); + set_bit(SMP_FLAG_CFM_PENDING, &smp->smp_flags); + + /* Can't compose response until we have been confirmed */ + if (!test_bit(SMP_FLAG_TK_VALID, &smp->smp_flags)) + return 0; - smp_send_cmd(conn, SMP_CMD_PAIRING_CONFIRM, sizeof(cp), &cp); + queue_work(hdev->workqueue, &smp->confirm); return 0; } static u8 smp_cmd_pairing_confirm(struct l2cap_conn *conn, struct sk_buff *skb) { - struct crypto_blkcipher *tfm = conn->hcon->hdev->tfm; + struct smp_chan *smp = conn->smp_chan; + struct hci_dev *hdev = conn->hcon->hdev; BT_DBG("conn %p %s", conn, conn->hcon->out ? "master" : "slave"); - memcpy(conn->pcnf, skb->data, sizeof(conn->pcnf)); - skb_pull(skb, sizeof(conn->pcnf)); + memcpy(smp->pcnf, skb->data, sizeof(smp->pcnf)); + skb_pull(skb, sizeof(smp->pcnf)); if (conn->hcon->out) { u8 random[16]; - swap128(conn->prnd, random); + swap128(smp->prnd, random); smp_send_cmd(conn, SMP_CMD_PAIRING_RANDOM, sizeof(random), random); + } else if (test_bit(SMP_FLAG_TK_VALID, &smp->smp_flags)) { + queue_work(hdev->workqueue, &smp->confirm); } else { - struct smp_cmd_pairing_confirm cp; - int ret; - u8 res[16]; - - ret = smp_rand(conn->prnd); - if (ret) - return SMP_UNSPECIFIED; - - ret = smp_c1(tfm, conn->tk, conn->prnd, conn->preq, conn->prsp, - conn->hcon->dst_type, conn->dst, - 0, conn->src, res); - if (ret) - return SMP_CONFIRM_FAILED; - - swap128(res, cp.confirm_val); - - smp_send_cmd(conn, SMP_CMD_PAIRING_CONFIRM, sizeof(cp), &cp); + set_bit(SMP_FLAG_CFM_PENDING, &smp->smp_flags); } - mod_timer(&conn->security_timer, jiffies + - msecs_to_jiffies(SMP_TIMEOUT)); - return 0; } static u8 smp_cmd_pairing_random(struct l2cap_conn *conn, struct sk_buff *skb) { - struct hci_conn *hcon = conn->hcon; - struct crypto_blkcipher *tfm = hcon->hdev->tfm; - int ret; - u8 key[16], res[16], random[16], confirm[16]; - - swap128(skb->data, random); - skb_pull(skb, sizeof(random)); - - if (conn->hcon->out) - ret = smp_c1(tfm, conn->tk, random, conn->preq, conn->prsp, 0, - conn->src, conn->hcon->dst_type, conn->dst, - res); - else - ret = smp_c1(tfm, conn->tk, random, conn->preq, conn->prsp, - conn->hcon->dst_type, conn->dst, 0, conn->src, - res); - if (ret) - return SMP_UNSPECIFIED; - - BT_DBG("conn %p %s", conn, conn->hcon->out ? "master" : "slave"); - - swap128(res, confirm); - - if (memcmp(conn->pcnf, confirm, sizeof(conn->pcnf)) != 0) { - BT_ERR("Pairing failed (confirmation values mismatch)"); - return SMP_CONFIRM_FAILED; - } - - if (conn->hcon->out) { - u8 stk[16], rand[8]; - __le16 ediv; + struct smp_chan *smp = conn->smp_chan; + struct hci_dev *hdev = conn->hcon->hdev; - memset(rand, 0, sizeof(rand)); - ediv = 0; + BT_DBG("conn %p", conn); - smp_s1(tfm, conn->tk, random, conn->prnd, key); - swap128(key, stk); + swap128(skb->data, smp->rrnd); + skb_pull(skb, sizeof(smp->rrnd)); - memset(stk + conn->smp_key_size, 0, - SMP_MAX_ENC_KEY_SIZE - conn->smp_key_size); + queue_work(hdev->workqueue, &smp->random); - hci_le_start_enc(hcon, ediv, rand, stk); - hcon->enc_key_size = conn->smp_key_size; - } else { - u8 stk[16], r[16], rand[8]; - __le16 ediv; + return 0; +} - memset(rand, 0, sizeof(rand)); - ediv = 0; +static u8 smp_ltk_encrypt(struct l2cap_conn *conn) +{ + struct link_key *key; + struct key_master_id *master; + struct hci_conn *hcon = conn->hcon; - swap128(conn->prnd, r); - smp_send_cmd(conn, SMP_CMD_PAIRING_RANDOM, sizeof(r), r); + key = hci_find_link_key_type(hcon->hdev, conn->dst, + HCI_LK_SMP_LTK); + if (!key) + return 0; - smp_s1(tfm, conn->tk, conn->prnd, random, key); - swap128(key, stk); + if (test_and_set_bit(HCI_CONN_ENCRYPT_PEND, + &hcon->pend)) + return 1; - memset(stk + conn->smp_key_size, 0, - SMP_MAX_ENC_KEY_SIZE - conn->smp_key_size); + master = (void *) key->data; + hci_le_start_enc(hcon, master->ediv, master->rand, + key->val); + hcon->enc_key_size = key->pin_len; - hci_add_ltk(conn->hcon->hdev, 0, conn->dst, conn->smp_key_size, - ediv, rand, stk); - } + return 1; - return 0; } - static u8 smp_cmd_security_req(struct l2cap_conn *conn, struct sk_buff *skb) { struct smp_cmd_security_req *rp = (void *) skb->data; struct smp_cmd_pairing cp; struct hci_conn *hcon = conn->hcon; + struct smp_chan *smp; BT_DBG("conn %p", conn); - if (test_bit(HCI_CONN_ENCRYPT_PEND, &hcon->pend)) + hcon->pending_sec_level = authreq_to_seclevel(rp->auth_req); + + if (smp_ltk_encrypt(conn)) return 0; + if (test_and_set_bit(HCI_CONN_LE_SMP_PEND, &hcon->pend)) + return 0; + + smp = smp_chan_create(conn); + skb_pull(skb, sizeof(*rp)); memset(&cp, 0, sizeof(cp)); build_pairing_cmd(conn, &cp, NULL, rp->auth_req); - conn->preq[0] = SMP_CMD_PAIRING_REQ; - memcpy(&conn->preq[1], &cp, sizeof(cp)); + smp->preq[0] = SMP_CMD_PAIRING_REQ; + memcpy(&smp->preq[1], &cp, sizeof(cp)); smp_send_cmd(conn, SMP_CMD_PAIRING_REQ, sizeof(cp), &cp); - mod_timer(&conn->security_timer, jiffies + - msecs_to_jiffies(SMP_TIMEOUT)); - - set_bit(HCI_CONN_ENCRYPT_PEND, &hcon->pend); - return 0; } int smp_conn_security(struct l2cap_conn *conn, __u8 sec_level) { struct hci_conn *hcon = conn->hcon; + struct smp_chan *smp = conn->smp_chan; __u8 authreq; BT_DBG("conn %p hcon %p level 0x%2.2x", conn, hcon, sec_level); @@ -464,42 +762,31 @@ int smp_conn_security(struct l2cap_conn *conn, __u8 sec_level) if (!lmp_host_le_capable(hcon->hdev)) return 1; - if (IS_ERR(hcon->hdev->tfm)) - return 1; - - if (test_bit(HCI_CONN_ENCRYPT_PEND, &hcon->pend)) - return 0; - if (sec_level == BT_SECURITY_LOW) return 1; if (hcon->sec_level >= sec_level) return 1; - authreq = seclevel_to_authreq(sec_level); + if (hcon->link_mode & HCI_LM_MASTER) + if (smp_ltk_encrypt(conn)) + goto done; - if (hcon->link_mode & HCI_LM_MASTER) { - struct smp_cmd_pairing cp; - struct link_key *key; + if (test_and_set_bit(HCI_CONN_LE_SMP_PEND, &hcon->pend)) + return 0; - key = hci_find_link_key_type(hcon->hdev, conn->dst, - HCI_LK_SMP_LTK); - if (key) { - struct key_master_id *master = (void *) key->data; + smp = smp_chan_create(conn); + if (!smp) + return 1; - hci_le_start_enc(hcon, master->ediv, master->rand, - key->val); - hcon->enc_key_size = key->pin_len; + authreq = seclevel_to_authreq(sec_level); - goto done; - } + if (hcon->link_mode & HCI_LM_MASTER) { + struct smp_cmd_pairing cp; build_pairing_cmd(conn, &cp, NULL, authreq); - conn->preq[0] = SMP_CMD_PAIRING_REQ; - memcpy(&conn->preq[1], &cp, sizeof(cp)); - - mod_timer(&conn->security_timer, jiffies + - msecs_to_jiffies(SMP_TIMEOUT)); + smp->preq[0] = SMP_CMD_PAIRING_REQ; + memcpy(&smp->preq[1], &cp, sizeof(cp)); smp_send_cmd(conn, SMP_CMD_PAIRING_REQ, sizeof(cp), &cp); } else { @@ -510,7 +797,6 @@ int smp_conn_security(struct l2cap_conn *conn, __u8 sec_level) done: hcon->pending_sec_level = sec_level; - set_bit(HCI_CONN_ENCRYPT_PEND, &hcon->pend); return 0; } @@ -518,10 +804,11 @@ done: static int smp_cmd_encrypt_info(struct l2cap_conn *conn, struct sk_buff *skb) { struct smp_cmd_encrypt_info *rp = (void *) skb->data; + struct smp_chan *smp = conn->smp_chan; skb_pull(skb, sizeof(*rp)); - memcpy(conn->tk, rp->ltk, sizeof(conn->tk)); + memcpy(smp->tk, rp->ltk, sizeof(smp->tk)); return 0; } @@ -529,11 +816,12 @@ static int smp_cmd_encrypt_info(struct l2cap_conn *conn, struct sk_buff *skb) static int smp_cmd_master_ident(struct l2cap_conn *conn, struct sk_buff *skb) { struct smp_cmd_master_ident *rp = (void *) skb->data; + struct smp_chan *smp = conn->smp_chan; skb_pull(skb, sizeof(*rp)); - hci_add_ltk(conn->hcon->hdev, 1, conn->src, conn->smp_key_size, - rp->ediv, rp->rand, conn->tk); + hci_add_ltk(conn->hcon->hdev, 1, conn->dst, smp->smp_key_size, + rp->ediv, rp->rand, smp->tk); smp_distribute_keys(conn, 1); @@ -552,12 +840,6 @@ int smp_sig_channel(struct l2cap_conn *conn, struct sk_buff *skb) goto done; } - if (IS_ERR(conn->hcon->hdev->tfm)) { - err = PTR_ERR(conn->hcon->hdev->tfm); - reason = SMP_PAIRING_NOTSUPP; - goto done; - } - skb_pull(skb, sizeof(code)); switch (code) { @@ -566,6 +848,7 @@ int smp_sig_channel(struct l2cap_conn *conn, struct sk_buff *skb) break; case SMP_CMD_PAIRING_FAIL: + smp_failure(conn, skb->data[0], 0); reason = 0; err = -EPERM; break; @@ -611,8 +894,7 @@ int smp_sig_channel(struct l2cap_conn *conn, struct sk_buff *skb) done: if (reason) - smp_send_cmd(conn, SMP_CMD_PAIRING_FAIL, sizeof(reason), - &reason); + smp_failure(conn, reason, 1); kfree_skb(skb); return err; @@ -621,20 +903,21 @@ done: int smp_distribute_keys(struct l2cap_conn *conn, __u8 force) { struct smp_cmd_pairing *req, *rsp; + struct smp_chan *smp = conn->smp_chan; __u8 *keydist; BT_DBG("conn %p force %d", conn, force); - if (IS_ERR(conn->hcon->hdev->tfm)) - return PTR_ERR(conn->hcon->hdev->tfm); + if (!test_bit(HCI_CONN_LE_SMP_PEND, &conn->hcon->pend)) + return 0; - rsp = (void *) &conn->prsp[1]; + rsp = (void *) &smp->prsp[1]; /* The responder sends its keys first */ if (!force && conn->hcon->out && (rsp->resp_key_dist & 0x07)) return 0; - req = (void *) &conn->preq[1]; + req = (void *) &smp->preq[1]; if (conn->hcon->out) { keydist = &rsp->init_key_dist; @@ -658,7 +941,7 @@ int smp_distribute_keys(struct l2cap_conn *conn, __u8 force) smp_send_cmd(conn, SMP_CMD_ENCRYPT_INFO, sizeof(enc), &enc); - hci_add_ltk(conn->hcon->hdev, 1, conn->dst, conn->smp_key_size, + hci_add_ltk(conn->hcon->hdev, 1, conn->dst, smp->smp_key_size, ediv, ident.rand, enc.ltk); ident.ediv = cpu_to_le16(ediv); @@ -698,5 +981,11 @@ int smp_distribute_keys(struct l2cap_conn *conn, __u8 force) *keydist &= ~SMP_DIST_SIGN; } + if (conn->hcon->out || force) { + clear_bit(HCI_CONN_LE_SMP_PEND, &conn->hcon->pend); + cancel_delayed_work_sync(&conn->security_timer); + smp_chan_destroy(conn); + } + return 0; } diff --git a/net/bridge/br.c b/net/bridge/br.c index f20c4fd915a8..ba780cc8e515 100644 --- a/net/bridge/br.c +++ b/net/bridge/br.c @@ -62,7 +62,7 @@ static int __init br_init(void) brioctl_set(br_ioctl_deviceless_stub); -#if defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE) +#if IS_ENABLED(CONFIG_ATM_LANE) br_fdb_test_addr_hook = br_fdb_test_addr; #endif @@ -93,7 +93,7 @@ static void __exit br_deinit(void) rcu_barrier(); /* Wait for completion of call_rcu()'s */ br_netfilter_fini(); -#if defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE) +#if IS_ENABLED(CONFIG_ATM_LANE) br_fdb_test_addr_hook = NULL; #endif diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index ff3ed6086ce1..71773b014e0c 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -170,8 +170,11 @@ static int br_set_mac_address(struct net_device *dev, void *p) return -EINVAL; spin_lock_bh(&br->lock); - memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN); - br_stp_change_bridge_id(br, addr->sa_data); + if (compare_ether_addr(dev->dev_addr, addr->sa_data)) { + memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN); + br_fdb_change_mac_address(br, addr->sa_data); + br_stp_change_bridge_id(br, addr->sa_data); + } br->flags |= BR_SET_MAC_ADDR; spin_unlock_bh(&br->lock); @@ -186,7 +189,8 @@ static void br_getinfo(struct net_device *dev, struct ethtool_drvinfo *info) strcpy(info->bus_info, "N/A"); } -static u32 br_fix_features(struct net_device *dev, u32 features) +static netdev_features_t br_fix_features(struct net_device *dev, + netdev_features_t features) { struct net_bridge *br = netdev_priv(dev); @@ -301,7 +305,7 @@ static const struct net_device_ops br_netdev_ops = { .ndo_start_xmit = br_dev_xmit, .ndo_get_stats64 = br_get_stats64, .ndo_set_mac_address = br_set_mac_address, - .ndo_set_multicast_list = br_dev_set_multicast_list, + .ndo_set_rx_mode = br_dev_set_multicast_list, .ndo_change_mtu = br_change_mtu, .ndo_do_ioctl = br_dev_ioctl, #ifdef CONFIG_NET_POLL_CONTROLLER @@ -341,10 +345,10 @@ void br_dev_setup(struct net_device *dev) dev->priv_flags = IFF_EBRIDGE; dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA | - NETIF_F_GSO_MASK | NETIF_F_NO_CSUM | NETIF_F_LLTX | + NETIF_F_GSO_MASK | NETIF_F_HW_CSUM | NETIF_F_LLTX | NETIF_F_NETNS_LOCAL | NETIF_F_HW_VLAN_TX; dev->hw_features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA | - NETIF_F_GSO_MASK | NETIF_F_NO_CSUM | + NETIF_F_GSO_MASK | NETIF_F_HW_CSUM | NETIF_F_HW_VLAN_TX; br->dev = dev; @@ -358,6 +362,8 @@ void br_dev_setup(struct net_device *dev) memcpy(br->group_addr, br_group_address, ETH_ALEN); br->stp_enabled = BR_NO_STP; + br->group_fwd_mask = BR_GROUPFWD_DEFAULT; + br->designated_root = br->bridge_id; br->bridge_max_age = br->max_age = 20 * HZ; br->bridge_hello_time = br->hello_time = 2 * HZ; diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 68def3b7fb49..f963f6b1884f 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -28,7 +28,8 @@ static struct kmem_cache *br_fdb_cache __read_mostly; static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source, const unsigned char *addr); -static void fdb_notify(const struct net_bridge_fdb_entry *, int); +static void fdb_notify(struct net_bridge *br, + const struct net_bridge_fdb_entry *, int); static u32 fdb_salt __read_mostly; @@ -80,10 +81,10 @@ static void fdb_rcu_free(struct rcu_head *head) kmem_cache_free(br_fdb_cache, ent); } -static inline void fdb_delete(struct net_bridge_fdb_entry *f) +static void fdb_delete(struct net_bridge *br, struct net_bridge_fdb_entry *f) { - fdb_notify(f, RTM_DELNEIGH); hlist_del_rcu(&f->hlist); + fdb_notify(br, f, RTM_DELNEIGH); call_rcu(&f->rcu, fdb_rcu_free); } @@ -114,7 +115,7 @@ void br_fdb_changeaddr(struct net_bridge_port *p, const unsigned char *newaddr) } /* delete old one */ - fdb_delete(f); + fdb_delete(br, f); goto insert; } } @@ -126,6 +127,18 @@ void br_fdb_changeaddr(struct net_bridge_port *p, const unsigned char *newaddr) spin_unlock_bh(&br->hash_lock); } +void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr) +{ + struct net_bridge_fdb_entry *f; + + /* If old entry was unassociated with any port, then delete it. */ + f = __br_fdb_get(br, br->dev->dev_addr); + if (f && f->is_local && !f->dst) + fdb_delete(br, f); + + fdb_insert(br, NULL, newaddr); +} + void br_fdb_cleanup(unsigned long _data) { struct net_bridge *br = (struct net_bridge *)_data; @@ -144,7 +157,7 @@ void br_fdb_cleanup(unsigned long _data) continue; this_timer = f->updated + delay; if (time_before_eq(this_timer, jiffies)) - fdb_delete(f); + fdb_delete(br, f); else if (time_before(this_timer, next_timer)) next_timer = this_timer; } @@ -165,7 +178,7 @@ void br_fdb_flush(struct net_bridge *br) struct hlist_node *h, *n; hlist_for_each_entry_safe(f, h, n, &br->hash[i], hlist) { if (!f->is_static) - fdb_delete(f); + fdb_delete(br, f); } } spin_unlock_bh(&br->hash_lock); @@ -209,7 +222,7 @@ void br_fdb_delete_by_port(struct net_bridge *br, } } - fdb_delete(f); + fdb_delete(br, f); skip_delete: ; } } @@ -234,7 +247,7 @@ struct net_bridge_fdb_entry *__br_fdb_get(struct net_bridge *br, return NULL; } -#if defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE) +#if IS_ENABLED(CONFIG_ATM_LANE) /* Interface used by ATM LANE hook to test * if an addr is on some other bridge port */ int br_fdb_test_addr(struct net_device *dev, unsigned char *addr) @@ -249,7 +262,7 @@ int br_fdb_test_addr(struct net_device *dev, unsigned char *addr) ret = 0; else { fdb = __br_fdb_get(port->br, addr); - ret = fdb && fdb->dst->dev != dev && + ret = fdb && fdb->dst && fdb->dst->dev != dev && fdb->dst->state == BR_STATE_FORWARDING; } rcu_read_unlock(); @@ -281,6 +294,10 @@ int br_fdb_fillbuf(struct net_bridge *br, void *buf, if (has_expired(br, f)) continue; + /* ignore pseudo entry for local MAC address */ + if (!f->dst) + continue; + if (skip) { --skip; continue; @@ -347,7 +364,6 @@ static struct net_bridge_fdb_entry *fdb_create(struct hlist_head *head, fdb->is_static = 0; fdb->updated = fdb->used = jiffies; hlist_add_head_rcu(&fdb->hlist, head); - fdb_notify(fdb, RTM_NEWNEIGH); } return fdb; } @@ -371,7 +387,7 @@ static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source, br_warn(br, "adding interface %s with same address " "as a received packet\n", source->dev->name); - fdb_delete(fdb); + fdb_delete(br, fdb); } fdb = fdb_create(head, source, addr); @@ -379,6 +395,7 @@ static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source, return -ENOMEM; fdb->is_local = fdb->is_static = 1; + fdb_notify(br, fdb, RTM_NEWNEIGH); return 0; } @@ -424,9 +441,11 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, } } else { spin_lock(&br->hash_lock); - if (likely(!fdb_find(head, addr))) - fdb_create(head, source, addr); - + if (likely(!fdb_find(head, addr))) { + fdb = fdb_create(head, source, addr); + if (fdb) + fdb_notify(br, fdb, RTM_NEWNEIGH); + } /* else we lose race and someone else inserts * it first, don't bother updating */ @@ -446,7 +465,7 @@ static int fdb_to_nud(const struct net_bridge_fdb_entry *fdb) return NUD_REACHABLE; } -static int fdb_fill_info(struct sk_buff *skb, +static int fdb_fill_info(struct sk_buff *skb, const struct net_bridge *br, const struct net_bridge_fdb_entry *fdb, u32 pid, u32 seq, int type, unsigned int flags) { @@ -459,14 +478,13 @@ static int fdb_fill_info(struct sk_buff *skb, if (nlh == NULL) return -EMSGSIZE; - ndm = nlmsg_data(nlh); ndm->ndm_family = AF_BRIDGE; ndm->ndm_pad1 = 0; ndm->ndm_pad2 = 0; ndm->ndm_flags = 0; ndm->ndm_type = 0; - ndm->ndm_ifindex = fdb->dst->dev->ifindex; + ndm->ndm_ifindex = fdb->dst ? fdb->dst->dev->ifindex : br->dev->ifindex; ndm->ndm_state = fdb_to_nud(fdb); NLA_PUT(skb, NDA_LLADDR, ETH_ALEN, &fdb->addr); @@ -491,9 +509,10 @@ static inline size_t fdb_nlmsg_size(void) + nla_total_size(sizeof(struct nda_cacheinfo)); } -static void fdb_notify(const struct net_bridge_fdb_entry *fdb, int type) +static void fdb_notify(struct net_bridge *br, + const struct net_bridge_fdb_entry *fdb, int type) { - struct net *net = dev_net(fdb->dst->dev); + struct net *net = dev_net(br->dev); struct sk_buff *skb; int err = -ENOBUFS; @@ -501,7 +520,7 @@ static void fdb_notify(const struct net_bridge_fdb_entry *fdb, int type) if (skb == NULL) goto errout; - err = fdb_fill_info(skb, fdb, 0, 0, type, 0); + err = fdb_fill_info(skb, br, fdb, 0, 0, type, 0); if (err < 0) { /* -EMSGSIZE implies BUG in fdb_nlmsg_size() */ WARN_ON(err == -EMSGSIZE); @@ -538,7 +557,7 @@ int br_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb) if (idx < cb->args[0]) goto skip; - if (fdb_fill_info(skb, f, + if (fdb_fill_info(skb, br, f, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, RTM_NEWNEIGH, @@ -556,26 +575,41 @@ skip: return skb->len; } -/* Create new static fdb entry */ +/* Update (create or replace) forwarding database entry */ static int fdb_add_entry(struct net_bridge_port *source, const __u8 *addr, - __u16 state) + __u16 state, __u16 flags) { struct net_bridge *br = source->br; struct hlist_head *head = &br->hash[br_mac_hash(addr)]; struct net_bridge_fdb_entry *fdb; fdb = fdb_find(head, addr); - if (fdb) - return -EEXIST; + if (fdb == NULL) { + if (!(flags & NLM_F_CREATE)) + return -ENOENT; + + fdb = fdb_create(head, source, addr); + if (!fdb) + return -ENOMEM; + fdb_notify(br, fdb, RTM_NEWNEIGH); + } else { + if (flags & NLM_F_EXCL) + return -EEXIST; + } - fdb = fdb_create(head, source, addr); - if (!fdb) - return -ENOMEM; + if (fdb_to_nud(fdb) != state) { + if (state & NUD_PERMANENT) + fdb->is_local = fdb->is_static = 1; + else if (state & NUD_NOARP) { + fdb->is_local = 0; + fdb->is_static = 1; + } else + fdb->is_local = fdb->is_static = 0; + + fdb->updated = fdb->used = jiffies; + fdb_notify(br, fdb, RTM_NEWNEIGH); + } - if (state & NUD_PERMANENT) - fdb->is_local = fdb->is_static = 1; - else if (state & NUD_NOARP) - fdb->is_static = 1; return 0; } @@ -618,6 +652,11 @@ int br_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) return -EINVAL; } + if (!(ndm->ndm_state & (NUD_PERMANENT|NUD_NOARP|NUD_REACHABLE))) { + pr_info("bridge: RTM_NEWNEIGH with invalid state %#x\n", ndm->ndm_state); + return -EINVAL; + } + p = br_port_get_rtnl(dev); if (p == NULL) { pr_info("bridge: RTM_NEWNEIGH %s not a bridge port\n", @@ -625,9 +664,15 @@ int br_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) return -EINVAL; } - spin_lock_bh(&p->br->hash_lock); - err = fdb_add_entry(p, addr, ndm->ndm_state); - spin_unlock_bh(&p->br->hash_lock); + if (ndm->ndm_flags & NTF_USE) { + rcu_read_lock(); + br_fdb_update(p->br, p, addr); + rcu_read_unlock(); + } else { + spin_lock_bh(&p->br->hash_lock); + err = fdb_add_entry(p, addr, ndm->ndm_state, nlh->nlmsg_flags); + spin_unlock_bh(&p->br->hash_lock); + } return err; } @@ -642,7 +687,7 @@ static int fdb_delete_by_addr(struct net_bridge_port *p, const u8 *addr) if (!fdb) return -ENOENT; - fdb_delete(fdb); + fdb_delete(p->br, fdb); return 0; } diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index ee64287f1290..61f65344e711 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -98,7 +98,7 @@ static void __br_forward(const struct net_bridge_port *to, struct sk_buff *skb) /* called with rcu_read_lock */ void br_deliver(const struct net_bridge_port *to, struct sk_buff *skb) { - if (should_deliver(to, skb)) { + if (to && should_deliver(to, skb)) { __br_deliver(to, skb); return; } diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 1d420f64ff27..0a942fbccc9a 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -13,6 +13,7 @@ #include <linux/kernel.h> #include <linux/netdevice.h> +#include <linux/etherdevice.h> #include <linux/netpoll.h> #include <linux/ethtool.h> #include <linux/if_arp.h> @@ -33,20 +34,18 @@ */ static int port_cost(struct net_device *dev) { - if (dev->ethtool_ops && dev->ethtool_ops->get_settings) { - struct ethtool_cmd ecmd = { .cmd = ETHTOOL_GSET, }; - - if (!dev_ethtool_get_settings(dev, &ecmd)) { - switch (ethtool_cmd_speed(&ecmd)) { - case SPEED_10000: - return 2; - case SPEED_1000: - return 4; - case SPEED_100: - return 19; - case SPEED_10: - return 100; - } + struct ethtool_cmd ecmd; + + if (!__ethtool_get_settings(dev, &ecmd)) { + switch (ethtool_cmd_speed(&ecmd)) { + case SPEED_10000: + return 2; + case SPEED_1000: + return 4; + case SPEED_100: + return 19; + case SPEED_10: + return 100; } } @@ -297,10 +296,11 @@ int br_min_mtu(const struct net_bridge *br) /* * Recomputes features using slave's features */ -u32 br_features_recompute(struct net_bridge *br, u32 features) +netdev_features_t br_features_recompute(struct net_bridge *br, + netdev_features_t features) { struct net_bridge_port *p; - u32 mask; + netdev_features_t mask; if (list_empty(&br->port_list)) return features; @@ -325,7 +325,8 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) /* Don't allow bridging non-ethernet like devices */ if ((dev->flags & IFF_LOOPBACK) || - dev->type != ARPHRD_ETHER || dev->addr_len != ETH_ALEN) + dev->type != ARPHRD_ETHER || dev->addr_len != ETH_ALEN || + !is_valid_ether_addr(dev->dev_addr)) return -EINVAL; /* No bridging of bridges */ @@ -353,10 +354,6 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) err = kobject_init_and_add(&p->kobj, &brport_ktype, &(dev->dev.kobj), SYSFS_BRIDGE_PORT_ATTR); if (err) - goto err0; - - err = br_fdb_insert(br, p, dev->dev_addr); - if (err) goto err1; err = br_sysfs_addif(p); @@ -397,6 +394,9 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) dev_set_mtu(br->dev, br_min_mtu(br)); + if (br_fdb_insert(br, p, dev->dev_addr)) + netdev_err(dev, "failed insert local address bridge forwarding table\n"); + kobject_uevent(&p->kobj, KOBJ_ADD); return 0; @@ -406,11 +406,9 @@ err4: err3: sysfs_remove_link(br->ifobj, p->dev->name); err2: - br_fdb_delete_by_port(br, p, 1); -err1: kobject_put(&p->kobj); p = NULL; /* kobject_put frees */ -err0: +err1: dev_set_promiscuity(dev, -1); put_back: dev_put(dev); diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index f06ee39c73fd..5a31731be4d0 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -16,6 +16,7 @@ #include <linux/netdevice.h> #include <linux/etherdevice.h> #include <linux/netfilter_bridge.h> +#include <linux/export.h> #include "br_private.h" /* Bridge group multicast address 802.1d (pg 51). */ @@ -162,14 +163,37 @@ rx_handler_result_t br_handle_frame(struct sk_buff **pskb) p = br_port_get_rcu(skb->dev); if (unlikely(is_link_local(dest))) { - /* Pause frames shouldn't be passed up by driver anyway */ - if (skb->protocol == htons(ETH_P_PAUSE)) + /* + * See IEEE 802.1D Table 7-10 Reserved addresses + * + * Assignment Value + * Bridge Group Address 01-80-C2-00-00-00 + * (MAC Control) 802.3 01-80-C2-00-00-01 + * (Link Aggregation) 802.3 01-80-C2-00-00-02 + * 802.1X PAE address 01-80-C2-00-00-03 + * + * 802.1AB LLDP 01-80-C2-00-00-0E + * + * Others reserved for future standardization + */ + switch (dest[5]) { + case 0x00: /* Bridge Group Address */ + /* If STP is turned off, + then must forward to keep loop detection */ + if (p->br->stp_enabled == BR_NO_STP) + goto forward; + break; + + case 0x01: /* IEEE MAC (Pause) */ goto drop; - /* If STP is turned off, then forward */ - if (p->br->stp_enabled == BR_NO_STP && dest[5] == 0) - goto forward; + default: + /* Allow selective forwarding for most other protocols */ + if (p->br->group_fwd_mask & (1u << dest[5])) + goto forward; + } + /* Deliver packet to local host only */ if (NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, skb, skb->dev, NULL, br_handle_local_finish)) { return RX_HANDLER_CONSUMED; /* consumed by filter */ diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 995cbe0ac0b2..568d5bf17534 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -24,7 +24,7 @@ #include <linux/slab.h> #include <linux/timer.h> #include <net/ip.h> -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) #include <net/ipv6.h> #include <net/mld.h> #include <net/addrconf.h> @@ -36,7 +36,7 @@ #define mlock_dereference(X, br) \ rcu_dereference_protected(X, lockdep_is_held(&br->multicast_lock)) -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) static inline int ipv6_is_transient_multicast(const struct in6_addr *addr) { if (ipv6_addr_is_multicast(addr) && IPV6_ADDR_MC_FLAG_TRANSIENT(addr)) @@ -52,7 +52,7 @@ static inline int br_ip_equal(const struct br_ip *a, const struct br_ip *b) switch (a->proto) { case htons(ETH_P_IP): return a->u.ip4 == b->u.ip4; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) case htons(ETH_P_IPV6): return ipv6_addr_equal(&a->u.ip6, &b->u.ip6); #endif @@ -65,7 +65,7 @@ static inline int __br_ip4_hash(struct net_bridge_mdb_htable *mdb, __be32 ip) return jhash_1word(mdb->secret, (__force u32)ip) & (mdb->max - 1); } -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) static inline int __br_ip6_hash(struct net_bridge_mdb_htable *mdb, const struct in6_addr *ip) { @@ -79,7 +79,7 @@ static inline int br_ip_hash(struct net_bridge_mdb_htable *mdb, switch (ip->proto) { case htons(ETH_P_IP): return __br_ip4_hash(mdb, ip->u.ip4); -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) case htons(ETH_P_IPV6): return __br_ip6_hash(mdb, &ip->u.ip6); #endif @@ -121,13 +121,13 @@ static struct net_bridge_mdb_entry *br_mdb_ip4_get( return br_mdb_ip_get(mdb, &br_dst); } -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) static struct net_bridge_mdb_entry *br_mdb_ip6_get( struct net_bridge_mdb_htable *mdb, const struct in6_addr *dst) { struct br_ip br_dst; - ipv6_addr_copy(&br_dst.u.ip6, dst); + br_dst.u.ip6 = *dst; br_dst.proto = htons(ETH_P_IPV6); return br_mdb_ip_get(mdb, &br_dst); @@ -152,9 +152,9 @@ struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br, case htons(ETH_P_IP): ip.u.ip4 = ip_hdr(skb)->daddr; break; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) case htons(ETH_P_IPV6): - ipv6_addr_copy(&ip.u.ip6, &ipv6_hdr(skb)->daddr); + ip.u.ip6 = ipv6_hdr(skb)->daddr; break; #endif default: @@ -411,7 +411,7 @@ out: return skb; } -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br, const struct in6_addr *group) { @@ -474,7 +474,7 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br, mldq->mld_cksum = 0; mldq->mld_maxdelay = htons((u16)jiffies_to_msecs(interval)); mldq->mld_reserved = 0; - ipv6_addr_copy(&mldq->mld_mca, group); + mldq->mld_mca = *group; /* checksum */ mldq->mld_cksum = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, @@ -496,7 +496,7 @@ static struct sk_buff *br_multicast_alloc_query(struct net_bridge *br, switch (addr->proto) { case htons(ETH_P_IP): return br_ip4_multicast_alloc_query(br, addr->u.ip4); -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) case htons(ETH_P_IPV6): return br_ip6_multicast_alloc_query(br, &addr->u.ip6); #endif @@ -773,7 +773,7 @@ static int br_ip4_multicast_add_group(struct net_bridge *br, return br_multicast_add_group(br, port, &br_group); } -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) static int br_ip6_multicast_add_group(struct net_bridge *br, struct net_bridge_port *port, const struct in6_addr *group) @@ -783,7 +783,7 @@ static int br_ip6_multicast_add_group(struct net_bridge *br, if (!ipv6_is_transient_multicast(group)) return 0; - ipv6_addr_copy(&br_group.u.ip6, group); + br_group.u.ip6 = *group; br_group.proto = htons(ETH_P_IPV6); return br_multicast_add_group(br, port, &br_group); @@ -845,7 +845,7 @@ static void br_multicast_send_query(struct net_bridge *br, br_group.proto = htons(ETH_P_IP); __br_multicast_send_query(br, port, &br_group); -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) br_group.proto = htons(ETH_P_IPV6); __br_multicast_send_query(br, port, &br_group); #endif @@ -989,7 +989,7 @@ static int br_ip4_multicast_igmp3_report(struct net_bridge *br, return err; } -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) static int br_ip6_multicast_mld2_report(struct net_bridge *br, struct net_bridge_port *port, struct sk_buff *skb) @@ -1185,7 +1185,7 @@ out: return err; } -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) static int br_ip6_multicast_query(struct net_bridge *br, struct net_bridge_port *port, struct sk_buff *skb) @@ -1334,7 +1334,7 @@ static void br_ip4_multicast_leave_group(struct net_bridge *br, br_multicast_leave_group(br, port, &br_group); } -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) static void br_ip6_multicast_leave_group(struct net_bridge *br, struct net_bridge_port *port, const struct in6_addr *group) @@ -1344,7 +1344,7 @@ static void br_ip6_multicast_leave_group(struct net_bridge *br, if (!ipv6_is_transient_multicast(group)) return; - ipv6_addr_copy(&br_group.u.ip6, group); + br_group.u.ip6 = *group; br_group.proto = htons(ETH_P_IPV6); br_multicast_leave_group(br, port, &br_group); @@ -1449,7 +1449,7 @@ err_out: return err; } -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) static int br_multicast_ipv6_rcv(struct net_bridge *br, struct net_bridge_port *port, struct sk_buff *skb) @@ -1458,6 +1458,7 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br, const struct ipv6hdr *ip6h; u8 icmp6_type; u8 nexthdr; + __be16 frag_off; unsigned len; int offset; int err; @@ -1483,7 +1484,7 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br, return -EINVAL; nexthdr = ip6h->nexthdr; - offset = ipv6_skip_exthdr(skb, sizeof(*ip6h), &nexthdr); + offset = ipv6_skip_exthdr(skb, sizeof(*ip6h), &nexthdr, &frag_off); if (offset < 0 || nexthdr != IPPROTO_ICMPV6) return 0; @@ -1501,6 +1502,8 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br, __skb_pull(skb2, offset); skb_reset_transport_header(skb2); + skb_postpull_rcsum(skb2, skb_network_header(skb2), + skb_network_header_len(skb2)); icmp6_type = icmp6_hdr(skb2)->icmp6_type; @@ -1593,7 +1596,7 @@ int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port, switch (skb->protocol) { case htons(ETH_P_IP): return br_multicast_ipv4_rcv(br, port, skb); -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) case htons(ETH_P_IPV6): return br_multicast_ipv6_rcv(br, port, skb); #endif @@ -1770,7 +1773,7 @@ int br_multicast_toggle(struct net_bridge *br, unsigned long val) int err = 0; struct net_bridge_mdb_htable *mdb; - spin_lock(&br->multicast_lock); + spin_lock_bh(&br->multicast_lock); if (br->multicast_disabled == !val) goto unlock; @@ -1806,7 +1809,7 @@ rollback: } unlock: - spin_unlock(&br->multicast_lock); + spin_unlock_bh(&br->multicast_lock); return err; } diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index d6ec3720c77e..84122472656c 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -114,12 +114,18 @@ static struct neighbour *fake_neigh_lookup(const struct dst_entry *dst, const vo return NULL; } +static unsigned int fake_mtu(const struct dst_entry *dst) +{ + return dst->dev->mtu; +} + static struct dst_ops fake_dst_ops = { .family = AF_INET, .protocol = cpu_to_be16(ETH_P_IP), .update_pmtu = fake_update_pmtu, .cow_metrics = fake_cow_metrics, .neigh_lookup = fake_neigh_lookup, + .mtu = fake_mtu, }; /* @@ -141,7 +147,7 @@ void br_netfilter_rtable_init(struct net_bridge *br) rt->dst.dev = br->dev; rt->dst.path = &rt->dst; dst_init_metrics(&rt->dst, br_dst_default_metrics, true); - rt->dst.flags = DST_NOXFRM; + rt->dst.flags = DST_NOXFRM | DST_NOPEER; rt->dst.ops = &fake_dst_ops; } @@ -356,7 +362,7 @@ static int br_nf_pre_routing_finish_bridge(struct sk_buff *skb) if (!skb->dev) goto free_skb; dst = skb_dst(skb); - neigh = dst_get_neighbour(dst); + neigh = dst_get_neighbour_noref(dst); if (neigh->hh.hh_len) { neigh_hh_bridge(&neigh->hh, skb); skb->dev = nf_bridge->physindev; @@ -807,7 +813,7 @@ static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff *skb, return NF_STOLEN; } -#if defined(CONFIG_NF_CONNTRACK_IPV4) || defined(CONFIG_NF_CONNTRACK_IPV4_MODULE) +#if IS_ENABLED(CONFIG_NF_CONNTRACK_IPV4) static int br_nf_dev_queue_xmit(struct sk_buff *skb) { int ret; diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index e5f9ece3c9a0..a1daf8227ed1 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -18,6 +18,7 @@ #include <net/sock.h> #include "br_private.h" +#include "br_private_stp.h" static inline size_t br_nlmsg_size(void) { @@ -188,6 +189,11 @@ static int br_rtm_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) p->state = new_state; br_log_state(p); + + spin_lock_bh(&p->br->lock); + br_port_state_selection(p->br); + spin_unlock_bh(&p->br->lock); + br_ifinfo_notify(RTM_NEWLINK, p); return 0; diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 857a021deea9..0b67a63ad7a8 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -29,6 +29,11 @@ #define BR_VERSION "2.3" +/* Control of forwarding link local multicast */ +#define BR_GROUPFWD_DEFAULT 0 +/* Don't allow forwarding control protocols like STP and LLDP */ +#define BR_GROUPFWD_RESTRICTED 0x4007u + /* Path to usermode spanning tree program */ #define BR_STP_PROG "/sbin/bridge-stp" @@ -51,7 +56,7 @@ struct br_ip { union { __be32 ip4; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) struct in6_addr ip6; #endif } u; @@ -193,6 +198,8 @@ struct net_bridge unsigned long flags; #define BR_SET_MAC_ADDR 0x00000001 + u16 group_fwd_mask; + /* STP */ bridge_id designated_root; bridge_id bridge_id; @@ -341,6 +348,7 @@ extern void br_fdb_fini(void); extern void br_fdb_flush(struct net_bridge *br); extern void br_fdb_changeaddr(struct net_bridge_port *p, const unsigned char *newaddr); +extern void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr); extern void br_fdb_cleanup(unsigned long arg); extern void br_fdb_delete_by_port(struct net_bridge *br, const struct net_bridge_port *p, int do_all); @@ -380,7 +388,8 @@ extern int br_add_if(struct net_bridge *br, extern int br_del_if(struct net_bridge *br, struct net_device *dev); extern int br_min_mtu(const struct net_bridge *br); -extern u32 br_features_recompute(struct net_bridge *br, u32 features); +extern netdev_features_t br_features_recompute(struct net_bridge *br, + netdev_features_t features); /* br_input.c */ extern int br_handle_frame_finish(struct sk_buff *skb); @@ -528,7 +537,7 @@ extern void br_stp_port_timer_init(struct net_bridge_port *p); extern unsigned long br_timer_value(const struct timer_list *timer); /* br.c */ -#if defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE) +#if IS_ENABLED(CONFIG_ATM_LANE) extern int (*br_fdb_test_addr_hook)(struct net_device *dev, unsigned char *addr); #endif diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c index ad0a3f7cf6cc..dd147d78a588 100644 --- a/net/bridge/br_stp.c +++ b/net/bridge/br_stp.c @@ -399,25 +399,24 @@ void br_port_state_selection(struct net_bridge *br) struct net_bridge_port *p; unsigned int liveports = 0; - /* Don't change port states if userspace is handling STP */ - if (br->stp_enabled == BR_USER_STP) - return; - list_for_each_entry(p, &br->port_list, list) { if (p->state == BR_STATE_DISABLED) continue; - if (p->port_no == br->root_port) { - p->config_pending = 0; - p->topology_change_ack = 0; - br_make_forwarding(p); - } else if (br_is_designated_port(p)) { - del_timer(&p->message_age_timer); - br_make_forwarding(p); - } else { - p->config_pending = 0; - p->topology_change_ack = 0; - br_make_blocking(p); + /* Don't change port states if userspace is handling STP */ + if (br->stp_enabled != BR_USER_STP) { + if (p->port_no == br->root_port) { + p->config_pending = 0; + p->topology_change_ack = 0; + br_make_forwarding(p); + } else if (br_is_designated_port(p)) { + del_timer(&p->message_age_timer); + br_make_forwarding(p); + } else { + p->config_pending = 0; + p->topology_change_ack = 0; + br_make_blocking(p); + } } if (p->state == BR_STATE_FORWARDING) diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c index 10eda3cd1d71..19308e305d85 100644 --- a/net/bridge/br_stp_if.c +++ b/net/bridge/br_stp_if.c @@ -12,6 +12,7 @@ */ #include <linux/kernel.h> +#include <linux/kmod.h> #include <linux/etherdevice.h> #include <linux/rtnetlink.h> diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c index 68b893ea8c3a..c236c0e43984 100644 --- a/net/bridge/br_sysfs_br.c +++ b/net/bridge/br_sysfs_br.c @@ -149,6 +149,39 @@ static ssize_t store_stp_state(struct device *d, static DEVICE_ATTR(stp_state, S_IRUGO | S_IWUSR, show_stp_state, store_stp_state); +static ssize_t show_group_fwd_mask(struct device *d, + struct device_attribute *attr, char *buf) +{ + struct net_bridge *br = to_bridge(d); + return sprintf(buf, "%#x\n", br->group_fwd_mask); +} + + +static ssize_t store_group_fwd_mask(struct device *d, + struct device_attribute *attr, const char *buf, + size_t len) +{ + struct net_bridge *br = to_bridge(d); + char *endp; + unsigned long val; + + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + val = simple_strtoul(buf, &endp, 0); + if (endp == buf) + return -EINVAL; + + if (val & BR_GROUPFWD_RESTRICTED) + return -EINVAL; + + br->group_fwd_mask = val; + + return len; +} +static DEVICE_ATTR(group_fwd_mask, S_IRUGO | S_IWUSR, show_group_fwd_mask, + store_group_fwd_mask); + static ssize_t show_priority(struct device *d, struct device_attribute *attr, char *buf) { @@ -652,6 +685,7 @@ static struct attribute *bridge_attrs[] = { &dev_attr_max_age.attr, &dev_attr_ageing_time.attr, &dev_attr_stp_state.attr, + &dev_attr_group_fwd_mask.attr, &dev_attr_priority.attr, &dev_attr_bridge_id.attr, &dev_attr_root_id.attr, diff --git a/net/bridge/netfilter/ebt_ip6.c b/net/bridge/netfilter/ebt_ip6.c index 2ed0056a39a8..99c85668f551 100644 --- a/net/bridge/netfilter/ebt_ip6.c +++ b/net/bridge/netfilter/ebt_ip6.c @@ -55,9 +55,10 @@ ebt_ip6_mt(const struct sk_buff *skb, struct xt_action_param *par) return false; if (info->bitmask & EBT_IP6_PROTO) { uint8_t nexthdr = ih6->nexthdr; + __be16 frag_off; int offset_ph; - offset_ph = ipv6_skip_exthdr(skb, sizeof(_ip6h), &nexthdr); + offset_ph = ipv6_skip_exthdr(skb, sizeof(_ip6h), &nexthdr, &frag_off); if (offset_ph == -1) return false; if (FWINV(info->protocol != nexthdr, EBT_IP6_PROTO)) diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c index 6e5a8bb9b940..f88ee537fb2b 100644 --- a/net/bridge/netfilter/ebt_log.c +++ b/net/bridge/netfilter/ebt_log.c @@ -107,12 +107,13 @@ ebt_log_packet(u_int8_t pf, unsigned int hooknum, goto out; } -#if defined(CONFIG_BRIDGE_EBT_IP6) || defined(CONFIG_BRIDGE_EBT_IP6_MODULE) +#if IS_ENABLED(CONFIG_BRIDGE_EBT_IP6) if ((bitmask & EBT_LOG_IP6) && eth_hdr(skb)->h_proto == htons(ETH_P_IPV6)) { const struct ipv6hdr *ih; struct ipv6hdr _iph; uint8_t nexthdr; + __be16 frag_off; int offset_ph; ih = skb_header_pointer(skb, 0, sizeof(_iph), &_iph); @@ -123,7 +124,7 @@ ebt_log_packet(u_int8_t pf, unsigned int hooknum, printk(" IPv6 SRC=%pI6 IPv6 DST=%pI6, IPv6 priority=0x%01X, Next Header=%d", &ih->saddr, &ih->daddr, ih->priority, ih->nexthdr); nexthdr = ih->nexthdr; - offset_ph = ipv6_skip_exthdr(skb, sizeof(_iph), &nexthdr); + offset_ph = ipv6_skip_exthdr(skb, sizeof(_iph), &nexthdr, &frag_off); if (offset_ph == -1) goto out; print_ports(skb, nexthdr, offset_ph); diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c index bf2a333ca7c7..5449294bdd5e 100644 --- a/net/bridge/netfilter/ebt_ulog.c +++ b/net/bridge/netfilter/ebt_ulog.c @@ -102,16 +102,15 @@ static struct sk_buff *ulog_alloc_skb(unsigned int size) unsigned int n; n = max(size, nlbufsiz); - skb = alloc_skb(n, GFP_ATOMIC); + skb = alloc_skb(n, GFP_ATOMIC | __GFP_NOWARN); if (!skb) { - pr_debug("cannot alloc whole buffer of size %ub!\n", n); if (n > size) { /* try to allocate only as much as we need for * current packet */ skb = alloc_skb(size, GFP_ATOMIC); if (!skb) - pr_debug("cannot even allocate " - "buffer of size %ub\n", size); + pr_debug("cannot even allocate buffer of size %ub\n", + size); } } diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c index 1bcaf36ad612..40d8258bf74f 100644 --- a/net/bridge/netfilter/ebtable_broute.c +++ b/net/bridge/netfilter/ebtable_broute.c @@ -87,14 +87,14 @@ static int __init ebtable_broute_init(void) if (ret < 0) return ret; /* see br_input.c */ - rcu_assign_pointer(br_should_route_hook, + RCU_INIT_POINTER(br_should_route_hook, (br_should_route_hook_t *)ebt_broute); return 0; } static void __exit ebtable_broute_fini(void) { - rcu_assign_pointer(br_should_route_hook, NULL); + RCU_INIT_POINTER(br_should_route_hook, NULL); synchronize_net(); unregister_pernet_subsys(&broute_net_ops); } diff --git a/net/caif/Kconfig b/net/caif/Kconfig index 529750da9624..936361e5a2b6 100644 --- a/net/caif/Kconfig +++ b/net/caif/Kconfig @@ -40,3 +40,14 @@ config CAIF_NETDEV If you select to build it as a built-in then the main CAIF device must also be a built-in. If unsure say Y. + +config CAIF_USB + tristate "CAIF USB support" + depends on CAIF + default n + ---help--- + Say Y if you are using CAIF over USB CDC NCM. + This can be either built-in or a loadable module, + If you select to build it as a built-in then the main CAIF device must + also be a built-in. + If unsure say N. diff --git a/net/caif/Makefile b/net/caif/Makefile index ebcd4e7e6f47..cc2b51154d03 100644 --- a/net/caif/Makefile +++ b/net/caif/Makefile @@ -10,5 +10,6 @@ caif-y := caif_dev.o \ obj-$(CONFIG_CAIF) += caif.o obj-$(CONFIG_CAIF_NETDEV) += chnl_net.o obj-$(CONFIG_CAIF) += caif_socket.o +obj-$(CONFIG_CAIF_USB) += caif_usb.o export-y := caif.o diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c index 7f9ac0742d19..61570ee76fe6 100644 --- a/net/caif/caif_dev.c +++ b/net/caif/caif_dev.c @@ -16,6 +16,8 @@ #include <linux/net.h> #include <linux/netdevice.h> #include <linux/mutex.h> +#include <linux/module.h> +#include <linux/spinlock.h> #include <net/netns/generic.h> #include <net/net_namespace.h> #include <net/pkt_sched.h> @@ -23,6 +25,7 @@ #include <net/caif/caif_layer.h> #include <net/caif/cfpkt.h> #include <net/caif/cfcnfg.h> +#include <net/caif/cfserl.h> MODULE_LICENSE("GPL"); @@ -32,6 +35,10 @@ struct caif_device_entry { struct list_head list; struct net_device *netdev; int __percpu *pcpu_refcnt; + spinlock_t flow_lock; + struct sk_buff *xoff_skb; + void (*xoff_skb_dtor)(struct sk_buff *skb); + bool xoff; }; struct caif_device_entry_list { @@ -46,13 +53,14 @@ struct caif_net { }; static int caif_net_id; +static int q_high = 50; /* Percent */ struct cfcnfg *get_cfcnfg(struct net *net) { struct caif_net *caifn; - BUG_ON(!net); caifn = net_generic(net, caif_net_id); - BUG_ON(!caifn); + if (!caifn) + return NULL; return caifn->cfg; } EXPORT_SYMBOL(get_cfcnfg); @@ -60,20 +68,20 @@ EXPORT_SYMBOL(get_cfcnfg); static struct caif_device_entry_list *caif_device_list(struct net *net) { struct caif_net *caifn; - BUG_ON(!net); caifn = net_generic(net, caif_net_id); - BUG_ON(!caifn); + if (!caifn) + return NULL; return &caifn->caifdevs; } static void caifd_put(struct caif_device_entry *e) { - irqsafe_cpu_dec(*e->pcpu_refcnt); + this_cpu_dec(*e->pcpu_refcnt); } static void caifd_hold(struct caif_device_entry *e) { - irqsafe_cpu_inc(*e->pcpu_refcnt); + this_cpu_inc(*e->pcpu_refcnt); } static int caifd_refcnt_read(struct caif_device_entry *e) @@ -91,7 +99,8 @@ static struct caif_device_entry *caif_device_alloc(struct net_device *dev) struct caif_device_entry *caifd; caifdevs = caif_device_list(dev_net(dev)); - BUG_ON(!caifdevs); + if (!caifdevs) + return NULL; caifd = kzalloc(sizeof(*caifd), GFP_KERNEL); if (!caifd) @@ -111,7 +120,9 @@ static struct caif_device_entry *caif_get(struct net_device *dev) struct caif_device_entry_list *caifdevs = caif_device_list(dev_net(dev)); struct caif_device_entry *caifd; - BUG_ON(!caifdevs); + if (!caifdevs) + return NULL; + list_for_each_entry_rcu(caifd, &caifdevs->list, list) { if (caifd->netdev == dev) return caifd; @@ -119,15 +130,106 @@ static struct caif_device_entry *caif_get(struct net_device *dev) return NULL; } +void caif_flow_cb(struct sk_buff *skb) +{ + struct caif_device_entry *caifd; + void (*dtor)(struct sk_buff *skb) = NULL; + bool send_xoff; + + WARN_ON(skb->dev == NULL); + + rcu_read_lock(); + caifd = caif_get(skb->dev); + caifd_hold(caifd); + rcu_read_unlock(); + + spin_lock_bh(&caifd->flow_lock); + send_xoff = caifd->xoff; + caifd->xoff = 0; + if (!WARN_ON(caifd->xoff_skb_dtor == NULL)) { + WARN_ON(caifd->xoff_skb != skb); + dtor = caifd->xoff_skb_dtor; + caifd->xoff_skb = NULL; + caifd->xoff_skb_dtor = NULL; + } + spin_unlock_bh(&caifd->flow_lock); + + if (dtor) + dtor(skb); + + if (send_xoff) + caifd->layer.up-> + ctrlcmd(caifd->layer.up, + _CAIF_CTRLCMD_PHYIF_FLOW_ON_IND, + caifd->layer.id); + caifd_put(caifd); +} + static int transmit(struct cflayer *layer, struct cfpkt *pkt) { - int err; + int err, high = 0, qlen = 0; + struct caif_dev_common *caifdev; struct caif_device_entry *caifd = container_of(layer, struct caif_device_entry, layer); struct sk_buff *skb; + struct netdev_queue *txq; + + rcu_read_lock_bh(); skb = cfpkt_tonative(pkt); skb->dev = caifd->netdev; + skb_reset_network_header(skb); + skb->protocol = htons(ETH_P_CAIF); + caifdev = netdev_priv(caifd->netdev); + + /* Check if we need to handle xoff */ + if (likely(caifd->netdev->tx_queue_len == 0)) + goto noxoff; + + if (unlikely(caifd->xoff)) + goto noxoff; + + if (likely(!netif_queue_stopped(caifd->netdev))) { + /* If we run with a TX queue, check if the queue is too long*/ + txq = netdev_get_tx_queue(skb->dev, 0); + qlen = qdisc_qlen(rcu_dereference_bh(txq->qdisc)); + + if (likely(qlen == 0)) + goto noxoff; + + high = (caifd->netdev->tx_queue_len * q_high) / 100; + if (likely(qlen < high)) + goto noxoff; + } + + /* Hold lock while accessing xoff */ + spin_lock_bh(&caifd->flow_lock); + if (caifd->xoff) { + spin_unlock_bh(&caifd->flow_lock); + goto noxoff; + } + + /* + * Handle flow off, we do this by temporary hi-jacking this + * skb's destructor function, and replace it with our own + * flow-on callback. The callback will set flow-on and call + * the original destructor. + */ + + pr_debug("queue has stopped(%d) or is full (%d > %d)\n", + netif_queue_stopped(caifd->netdev), + qlen, high); + caifd->xoff = 1; + caifd->xoff_skb = skb; + caifd->xoff_skb_dtor = skb->destructor; + skb->destructor = caif_flow_cb; + spin_unlock_bh(&caifd->flow_lock); + + caifd->layer.up->ctrlcmd(caifd->layer.up, + _CAIF_CTRLCMD_PHYIF_FLOW_OFF_IND, + caifd->layer.id); +noxoff: + rcu_read_unlock_bh(); err = dev_queue_xmit(skb); if (err > 0) @@ -171,7 +273,10 @@ static int receive(struct sk_buff *skb, struct net_device *dev, /* Release reference to stack upwards */ caifd_put(caifd); - return 0; + + if (err != 0) + err = NET_RX_DROP; + return err; } static struct packet_type caif_packet_type __read_mostly = { @@ -202,6 +307,57 @@ static void dev_flowctrl(struct net_device *dev, int on) caifd_put(caifd); } +void caif_enroll_dev(struct net_device *dev, struct caif_dev_common *caifdev, + struct cflayer *link_support, int head_room, + struct cflayer **layer, int (**rcv_func)( + struct sk_buff *, struct net_device *, + struct packet_type *, struct net_device *)) +{ + struct caif_device_entry *caifd; + enum cfcnfg_phy_preference pref; + struct cfcnfg *cfg = get_cfcnfg(dev_net(dev)); + struct caif_device_entry_list *caifdevs; + + caifdevs = caif_device_list(dev_net(dev)); + if (!cfg || !caifdevs) + return; + caifd = caif_device_alloc(dev); + if (!caifd) + return; + *layer = &caifd->layer; + spin_lock_init(&caifd->flow_lock); + + switch (caifdev->link_select) { + case CAIF_LINK_HIGH_BANDW: + pref = CFPHYPREF_HIGH_BW; + break; + case CAIF_LINK_LOW_LATENCY: + pref = CFPHYPREF_LOW_LAT; + break; + default: + pref = CFPHYPREF_HIGH_BW; + break; + } + mutex_lock(&caifdevs->lock); + list_add_rcu(&caifd->list, &caifdevs->list); + + strncpy(caifd->layer.name, dev->name, + sizeof(caifd->layer.name) - 1); + caifd->layer.name[sizeof(caifd->layer.name) - 1] = 0; + caifd->layer.transmit = transmit; + cfcnfg_add_phy_layer(cfg, + dev, + &caifd->layer, + pref, + link_support, + caifdev->use_fcs, + head_room); + mutex_unlock(&caifdevs->lock); + if (rcv_func) + *rcv_func = receive; +} +EXPORT_SYMBOL(caif_enroll_dev); + /* notify Caif of device events */ static int caif_device_notify(struct notifier_block *me, unsigned long what, void *arg) @@ -209,61 +365,40 @@ static int caif_device_notify(struct notifier_block *me, unsigned long what, struct net_device *dev = arg; struct caif_device_entry *caifd = NULL; struct caif_dev_common *caifdev; - enum cfcnfg_phy_preference pref; - enum cfcnfg_phy_type phy_type; struct cfcnfg *cfg; - struct caif_device_entry_list *caifdevs = - caif_device_list(dev_net(dev)); + struct cflayer *layer, *link_support; + int head_room = 0; + struct caif_device_entry_list *caifdevs; - if (dev->type != ARPHRD_CAIF) + cfg = get_cfcnfg(dev_net(dev)); + caifdevs = caif_device_list(dev_net(dev)); + if (!cfg || !caifdevs) return 0; - cfg = get_cfcnfg(dev_net(dev)); - if (cfg == NULL) + caifd = caif_get(dev); + if (caifd == NULL && dev->type != ARPHRD_CAIF) return 0; switch (what) { case NETDEV_REGISTER: - caifd = caif_device_alloc(dev); - if (!caifd) - return 0; + if (caifd != NULL) + break; caifdev = netdev_priv(dev); - caifdev->flowctrl = dev_flowctrl; - caifd->layer.transmit = transmit; - - if (caifdev->use_frag) - phy_type = CFPHYTYPE_FRAG; - else - phy_type = CFPHYTYPE_CAIF; - - switch (caifdev->link_select) { - case CAIF_LINK_HIGH_BANDW: - pref = CFPHYPREF_HIGH_BW; - break; - case CAIF_LINK_LOW_LATENCY: - pref = CFPHYPREF_LOW_LAT; - break; - default: - pref = CFPHYPREF_HIGH_BW; - break; + link_support = NULL; + if (caifdev->use_frag) { + head_room = 1; + link_support = cfserl_create(dev->ifindex, + caifdev->use_stx); + if (!link_support) { + pr_warn("Out of memory\n"); + break; + } } - strncpy(caifd->layer.name, dev->name, - sizeof(caifd->layer.name) - 1); - caifd->layer.name[sizeof(caifd->layer.name) - 1] = 0; - - mutex_lock(&caifdevs->lock); - list_add_rcu(&caifd->list, &caifdevs->list); - - cfcnfg_add_phy_layer(cfg, - phy_type, - dev, - &caifd->layer, - pref, - caifdev->use_fcs, - caifdev->use_stx); - mutex_unlock(&caifdevs->lock); + caif_enroll_dev(dev, caifdev, link_support, head_room, + &layer, NULL); + caifdev->flowctrl = dev_flowctrl; break; case NETDEV_UP: @@ -275,6 +410,7 @@ static int caif_device_notify(struct notifier_block *me, unsigned long what, break; } + caifd->xoff = 0; cfcnfg_set_phy_state(cfg, &caifd->layer, true); rcu_read_unlock(); @@ -296,6 +432,24 @@ static int caif_device_notify(struct notifier_block *me, unsigned long what, caifd->layer.up->ctrlcmd(caifd->layer.up, _CAIF_CTRLCMD_PHYIF_DOWN_IND, caifd->layer.id); + + spin_lock_bh(&caifd->flow_lock); + + /* + * Replace our xoff-destructor with original destructor. + * We trust that skb->destructor *always* is called before + * the skb reference is invalid. The hijacked SKB destructor + * takes the flow_lock so manipulating the skb->destructor here + * should be safe. + */ + if (caifd->xoff_skb_dtor != NULL && caifd->xoff_skb != NULL) + caifd->xoff_skb->destructor = caifd->xoff_skb_dtor; + + caifd->xoff = 0; + caifd->xoff_skb_dtor = NULL; + caifd->xoff_skb = NULL; + + spin_unlock_bh(&caifd->flow_lock); caifd_put(caifd); break; @@ -351,15 +505,15 @@ static struct notifier_block caif_device_notifier = { static int caif_init_net(struct net *net) { struct caif_net *caifn = net_generic(net, caif_net_id); - BUG_ON(!caifn); + if (WARN_ON(!caifn)) + return -EINVAL; + INIT_LIST_HEAD(&caifn->caifdevs.list); mutex_init(&caifn->caifdevs.lock); caifn->cfg = cfcnfg_create(); - if (!caifn->cfg) { - pr_warn("can't create cfcnfg\n"); + if (!caifn->cfg) return -ENOMEM; - } return 0; } @@ -369,17 +523,14 @@ static void caif_exit_net(struct net *net) struct caif_device_entry *caifd, *tmp; struct caif_device_entry_list *caifdevs = caif_device_list(net); - struct cfcnfg *cfg; + struct cfcnfg *cfg = get_cfcnfg(net); + + if (!cfg || !caifdevs) + return; rtnl_lock(); mutex_lock(&caifdevs->lock); - cfg = get_cfcnfg(net); - if (cfg == NULL) { - mutex_unlock(&caifdevs->lock); - return; - } - list_for_each_entry_safe(caifd, tmp, &caifdevs->list, list) { int i = 0; list_del_rcu(&caifd->list); diff --git a/net/caif/caif_usb.c b/net/caif/caif_usb.c new file mode 100644 index 000000000000..5fc9eca8cd41 --- /dev/null +++ b/net/caif/caif_usb.c @@ -0,0 +1,208 @@ +/* + * CAIF USB handler + * Copyright (C) ST-Ericsson AB 2011 + * Author: Sjur Brendeland/sjur.brandeland@stericsson.com + * License terms: GNU General Public License (GPL) version 2 + * + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__ + +#include <linux/module.h> +#include <linux/netdevice.h> +#include <linux/slab.h> +#include <linux/netdevice.h> +#include <linux/mii.h> +#include <linux/usb.h> +#include <linux/usb/usbnet.h> +#include <net/netns/generic.h> +#include <net/caif/caif_dev.h> +#include <net/caif/caif_layer.h> +#include <net/caif/cfpkt.h> +#include <net/caif/cfcnfg.h> + +MODULE_LICENSE("GPL"); + +#define CFUSB_PAD_DESCR_SZ 1 /* Alignment descriptor length */ +#define CFUSB_ALIGNMENT 4 /* Number of bytes to align. */ +#define CFUSB_MAX_HEADLEN (CFUSB_PAD_DESCR_SZ + CFUSB_ALIGNMENT-1) +#define STE_USB_VID 0x04cc /* USB Product ID for ST-Ericsson */ +#define STE_USB_PID_CAIF 0x2306 /* Product id for CAIF Modems */ + +struct cfusbl { + struct cflayer layer; + u8 tx_eth_hdr[ETH_HLEN]; +}; + +static bool pack_added; + +static int cfusbl_receive(struct cflayer *layr, struct cfpkt *pkt) +{ + u8 hpad; + + /* Remove padding. */ + cfpkt_extr_head(pkt, &hpad, 1); + cfpkt_extr_head(pkt, NULL, hpad); + return layr->up->receive(layr->up, pkt); +} + +static int cfusbl_transmit(struct cflayer *layr, struct cfpkt *pkt) +{ + struct caif_payload_info *info; + u8 hpad; + u8 zeros[CFUSB_ALIGNMENT]; + struct sk_buff *skb; + struct cfusbl *usbl = container_of(layr, struct cfusbl, layer); + + skb = cfpkt_tonative(pkt); + + skb_reset_network_header(skb); + skb->protocol = htons(ETH_P_IP); + + info = cfpkt_info(pkt); + hpad = (info->hdr_len + CFUSB_PAD_DESCR_SZ) & (CFUSB_ALIGNMENT - 1); + + if (skb_headroom(skb) < ETH_HLEN + CFUSB_PAD_DESCR_SZ + hpad) { + pr_warn("Headroom to small\n"); + kfree_skb(skb); + return -EIO; + } + memset(zeros, 0, hpad); + + cfpkt_add_head(pkt, zeros, hpad); + cfpkt_add_head(pkt, &hpad, 1); + cfpkt_add_head(pkt, usbl->tx_eth_hdr, sizeof(usbl->tx_eth_hdr)); + return layr->dn->transmit(layr->dn, pkt); +} + +static void cfusbl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, + int phyid) +{ + if (layr->up && layr->up->ctrlcmd) + layr->up->ctrlcmd(layr->up, ctrl, layr->id); +} + +struct cflayer *cfusbl_create(int phyid, u8 ethaddr[ETH_ALEN], + u8 braddr[ETH_ALEN]) +{ + struct cfusbl *this = kmalloc(sizeof(struct cfusbl), GFP_ATOMIC); + + if (!this) { + pr_warn("Out of memory\n"); + return NULL; + } + caif_assert(offsetof(struct cfusbl, layer) == 0); + + memset(this, 0, sizeof(struct cflayer)); + this->layer.receive = cfusbl_receive; + this->layer.transmit = cfusbl_transmit; + this->layer.ctrlcmd = cfusbl_ctrlcmd; + snprintf(this->layer.name, CAIF_LAYER_NAME_SZ, "usb%d", phyid); + this->layer.id = phyid; + + /* + * Construct TX ethernet header: + * 0-5 destination address + * 5-11 source address + * 12-13 protocol type + */ + memcpy(&this->tx_eth_hdr[ETH_ALEN], braddr, ETH_ALEN); + memcpy(&this->tx_eth_hdr[ETH_ALEN], ethaddr, ETH_ALEN); + this->tx_eth_hdr[12] = cpu_to_be16(ETH_P_802_EX1) & 0xff; + this->tx_eth_hdr[13] = (cpu_to_be16(ETH_P_802_EX1) >> 8) & 0xff; + pr_debug("caif ethernet TX-header dst:%pM src:%pM type:%02x%02x\n", + this->tx_eth_hdr, this->tx_eth_hdr + ETH_ALEN, + this->tx_eth_hdr[12], this->tx_eth_hdr[13]); + + return (struct cflayer *) this; +} + +static struct packet_type caif_usb_type __read_mostly = { + .type = cpu_to_be16(ETH_P_802_EX1), +}; + +static int cfusbl_device_notify(struct notifier_block *me, unsigned long what, + void *arg) +{ + struct net_device *dev = arg; + struct caif_dev_common common; + struct cflayer *layer, *link_support; + struct usbnet *usbnet = netdev_priv(dev); + struct usb_device *usbdev = usbnet->udev; + struct ethtool_drvinfo drvinfo; + + /* + * Quirks: High-jack ethtool to find if we have a NCM device, + * and find it's VID/PID. + */ + if (dev->ethtool_ops == NULL || dev->ethtool_ops->get_drvinfo == NULL) + return 0; + + dev->ethtool_ops->get_drvinfo(dev, &drvinfo); + if (strncmp(drvinfo.driver, "cdc_ncm", 7) != 0) + return 0; + + pr_debug("USB CDC NCM device VID:0x%4x PID:0x%4x\n", + le16_to_cpu(usbdev->descriptor.idVendor), + le16_to_cpu(usbdev->descriptor.idProduct)); + + /* Check for VID/PID that supports CAIF */ + if (!(le16_to_cpu(usbdev->descriptor.idVendor) == STE_USB_VID && + le16_to_cpu(usbdev->descriptor.idProduct) == STE_USB_PID_CAIF)) + return 0; + + if (what == NETDEV_UNREGISTER) + module_put(THIS_MODULE); + + if (what != NETDEV_REGISTER) + return 0; + + __module_get(THIS_MODULE); + + memset(&common, 0, sizeof(common)); + common.use_frag = false; + common.use_fcs = false; + common.use_stx = false; + common.link_select = CAIF_LINK_HIGH_BANDW; + common.flowctrl = NULL; + + link_support = cfusbl_create(dev->ifindex, dev->dev_addr, + dev->broadcast); + + if (!link_support) + return -ENOMEM; + + if (dev->num_tx_queues > 1) + pr_warn("USB device uses more than one tx queue\n"); + + caif_enroll_dev(dev, &common, link_support, CFUSB_MAX_HEADLEN, + &layer, &caif_usb_type.func); + if (!pack_added) + dev_add_pack(&caif_usb_type); + pack_added = true; + + strncpy(layer->name, dev->name, + sizeof(layer->name) - 1); + layer->name[sizeof(layer->name) - 1] = 0; + + return 0; +} + +static struct notifier_block caif_device_notifier = { + .notifier_call = cfusbl_device_notify, + .priority = 0, +}; + +static int __init cfusbl_init(void) +{ + return register_netdevice_notifier(&caif_device_notifier); +} + +static void __exit cfusbl_exit(void) +{ + unregister_netdevice_notifier(&caif_device_notifier); + dev_remove_pack(&caif_usb_type); +} + +module_init(cfusbl_init); +module_exit(cfusbl_exit); diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c index 52fe33bee029..598aafb4cb51 100644 --- a/net/caif/cfcnfg.c +++ b/net/caif/cfcnfg.c @@ -45,8 +45,8 @@ struct cfcnfg_phyinfo { /* Interface index */ int ifindex; - /* Use Start of frame extension */ - bool use_stx; + /* Protocol head room added for CAIF link layer */ + int head_room; /* Use Start of frame checksum */ bool use_fcs; @@ -78,10 +78,8 @@ struct cfcnfg *cfcnfg_create(void) /* Initiate this layer */ this = kzalloc(sizeof(struct cfcnfg), GFP_ATOMIC); - if (!this) { - pr_warn("Out of memory\n"); + if (!this) return NULL; - } this->mux = cfmuxl_create(); if (!this->mux) goto out_of_mem; @@ -108,8 +106,6 @@ struct cfcnfg *cfcnfg_create(void) return this; out_of_mem: - pr_warn("Out of memory\n"); - synchronize_rcu(); kfree(this->mux); @@ -191,11 +187,11 @@ int caif_disconnect_client(struct net *net, struct cflayer *adap_layer) if (channel_id != 0) { struct cflayer *servl; servl = cfmuxl_remove_uplayer(cfg->mux, channel_id); + cfctrl_linkdown_req(cfg->ctrl, channel_id, adap_layer); if (servl != NULL) layer_set_up(servl, NULL); } else pr_debug("nothing to disconnect\n"); - cfctrl_linkdown_req(cfg->ctrl, channel_id, adap_layer); /* Do RCU sync before initiating cleanup */ synchronize_rcu(); @@ -354,9 +350,7 @@ int caif_connect_client(struct net *net, struct caif_connect_request *conn_req, *ifindex = phy->ifindex; *proto_tail = 2; - *proto_head = - - protohead[param.linktype] + (phy->use_stx ? 1 : 0); + *proto_head = protohead[param.linktype] + phy->head_room; rcu_read_unlock(); @@ -448,10 +442,8 @@ cfcnfg_linkup_rsp(struct cflayer *layer, u8 channel_id, enum cfctrl_srv serv, "- unknown channel type\n"); goto unlock; } - if (!servicel) { - pr_warn("Out of memory\n"); + if (!servicel) goto unlock; - } layer_set_dn(servicel, cnfg->mux); cfmuxl_set_uplayer(cnfg->mux, servicel, channel_id); layer_set_up(servicel, adapt_layer); @@ -466,14 +458,14 @@ unlock: } void -cfcnfg_add_phy_layer(struct cfcnfg *cnfg, enum cfcnfg_phy_type phy_type, +cfcnfg_add_phy_layer(struct cfcnfg *cnfg, struct net_device *dev, struct cflayer *phy_layer, enum cfcnfg_phy_preference pref, - bool fcs, bool stx) + struct cflayer *link_support, + bool fcs, int head_room) { struct cflayer *frml; - struct cflayer *phy_driver = NULL; - struct cfcnfg_phyinfo *phyinfo; + struct cfcnfg_phyinfo *phyinfo = NULL; int i; u8 phyid; @@ -492,22 +484,9 @@ cfcnfg_add_phy_layer(struct cfcnfg *cnfg, enum cfcnfg_phy_type phy_type, got_phyid: phyinfo = kzalloc(sizeof(struct cfcnfg_phyinfo), GFP_ATOMIC); + if (!phyinfo) + goto out_err; - switch (phy_type) { - case CFPHYTYPE_FRAG: - phy_driver = - cfserl_create(CFPHYTYPE_FRAG, phyid, stx); - if (!phy_driver) { - pr_warn("Out of memory\n"); - goto out; - } - break; - case CFPHYTYPE_CAIF: - phy_driver = NULL; - break; - default: - goto out; - } phy_layer->id = phyid; phyinfo->pref = pref; phyinfo->id = phyid; @@ -515,25 +494,22 @@ got_phyid: phyinfo->dev_info.dev = dev; phyinfo->phy_layer = phy_layer; phyinfo->ifindex = dev->ifindex; - phyinfo->use_stx = stx; + phyinfo->head_room = head_room; phyinfo->use_fcs = fcs; frml = cffrml_create(phyid, fcs); - if (!frml) { - pr_warn("Out of memory\n"); - kfree(phyinfo); - goto out; - } + if (!frml) + goto out_err; phyinfo->frm_layer = frml; layer_set_up(frml, cnfg->mux); - if (phy_driver != NULL) { - phy_driver->id = phyid; - layer_set_dn(frml, phy_driver); - layer_set_up(phy_driver, frml); - layer_set_dn(phy_driver, phy_layer); - layer_set_up(phy_layer, phy_driver); + if (link_support != NULL) { + link_support->id = phyid; + layer_set_dn(frml, link_support); + layer_set_up(link_support, frml); + layer_set_dn(link_support, phy_layer); + layer_set_up(phy_layer, link_support); } else { layer_set_dn(frml, phy_layer); layer_set_up(phy_layer, frml); @@ -542,6 +518,11 @@ got_phyid: list_add_rcu(&phyinfo->node, &cnfg->phys); out: mutex_unlock(&cnfg->lock); + return; + +out_err: + kfree(phyinfo); + mutex_unlock(&cnfg->lock); } EXPORT_SYMBOL(cfcnfg_add_phy_layer); diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c index e22671bed669..5cf52225692e 100644 --- a/net/caif/cfctrl.c +++ b/net/caif/cfctrl.c @@ -35,15 +35,12 @@ struct cflayer *cfctrl_create(void) { struct dev_info dev_info; struct cfctrl *this = - kmalloc(sizeof(struct cfctrl), GFP_ATOMIC); - if (!this) { - pr_warn("Out of memory\n"); + kzalloc(sizeof(struct cfctrl), GFP_ATOMIC); + if (!this) return NULL; - } caif_assert(offsetof(struct cfctrl, serv.layer) == 0); memset(&dev_info, 0, sizeof(dev_info)); dev_info.id = 0xff; - memset(this, 0, sizeof(*this)); cfsrvl_init(&this->serv, 0, &dev_info, false); atomic_set(&this->req_seq_no, 1); atomic_set(&this->rsp_seq_no, 1); @@ -180,10 +177,8 @@ void cfctrl_enum_req(struct cflayer *layer, u8 physlinkid) struct cfctrl *cfctrl = container_obj(layer); struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN); struct cflayer *dn = cfctrl->serv.layer.dn; - if (!pkt) { - pr_warn("Out of memory\n"); + if (!pkt) return; - } if (!dn) { pr_debug("not able to send enum request\n"); return; @@ -224,10 +219,8 @@ int cfctrl_linkup_request(struct cflayer *layer, } pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN); - if (!pkt) { - pr_warn("Out of memory\n"); + if (!pkt) return -ENOMEM; - } cfpkt_addbdy(pkt, CFCTRL_CMD_LINK_SETUP); cfpkt_addbdy(pkt, (param->chtype << 4) | param->linktype); cfpkt_addbdy(pkt, (param->priority << 3) | param->phyid); @@ -275,10 +268,8 @@ int cfctrl_linkup_request(struct cflayer *layer, return -EINVAL; } req = kzalloc(sizeof(*req), GFP_KERNEL); - if (!req) { - pr_warn("Out of memory\n"); + if (!req) return -ENOMEM; - } req->client_layer = user_layer; req->cmd = CFCTRL_CMD_LINK_SETUP; req->param = *param; @@ -312,10 +303,8 @@ int cfctrl_linkdown_req(struct cflayer *layer, u8 channelid, struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN); struct cflayer *dn = cfctrl->serv.layer.dn; - if (!pkt) { - pr_warn("Out of memory\n"); + if (!pkt) return -ENOMEM; - } if (!dn) { pr_debug("not able to send link-down request\n"); diff --git a/net/caif/cfdbgl.c b/net/caif/cfdbgl.c index 11a2af4c162a..65d6ef3cf9aa 100644 --- a/net/caif/cfdbgl.c +++ b/net/caif/cfdbgl.c @@ -19,13 +19,10 @@ static int cfdbgl_transmit(struct cflayer *layr, struct cfpkt *pkt); struct cflayer *cfdbgl_create(u8 channel_id, struct dev_info *dev_info) { - struct cfsrvl *dbg = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC); - if (!dbg) { - pr_warn("Out of memory\n"); + struct cfsrvl *dbg = kzalloc(sizeof(struct cfsrvl), GFP_ATOMIC); + if (!dbg) return NULL; - } caif_assert(offsetof(struct cfsrvl, layer) == 0); - memset(dbg, 0, sizeof(struct cfsrvl)); cfsrvl_init(dbg, channel_id, dev_info, false); dbg->layer.receive = cfdbgl_receive; dbg->layer.transmit = cfdbgl_transmit; diff --git a/net/caif/cfdgml.c b/net/caif/cfdgml.c index 0382dec84fdc..0f5ff27aa41c 100644 --- a/net/caif/cfdgml.c +++ b/net/caif/cfdgml.c @@ -26,13 +26,10 @@ static int cfdgml_transmit(struct cflayer *layr, struct cfpkt *pkt); struct cflayer *cfdgml_create(u8 channel_id, struct dev_info *dev_info) { - struct cfsrvl *dgm = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC); - if (!dgm) { - pr_warn("Out of memory\n"); + struct cfsrvl *dgm = kzalloc(sizeof(struct cfsrvl), GFP_ATOMIC); + if (!dgm) return NULL; - } caif_assert(offsetof(struct cfsrvl, layer) == 0); - memset(dgm, 0, sizeof(struct cfsrvl)); cfsrvl_init(dgm, channel_id, dev_info, true); dgm->layer.receive = cfdgml_receive; dgm->layer.transmit = cfdgml_transmit; diff --git a/net/caif/cffrml.c b/net/caif/cffrml.c index 04204b202718..0a7df7ef062d 100644 --- a/net/caif/cffrml.c +++ b/net/caif/cffrml.c @@ -34,11 +34,9 @@ static u32 cffrml_rcv_error; static u32 cffrml_rcv_checsum_error; struct cflayer *cffrml_create(u16 phyid, bool use_fcs) { - struct cffrml *this = kmalloc(sizeof(struct cffrml), GFP_ATOMIC); - if (!this) { - pr_warn("Out of memory\n"); + struct cffrml *this = kzalloc(sizeof(struct cffrml), GFP_ATOMIC); + if (!this) return NULL; - } this->pcpu_refcnt = alloc_percpu(int); if (this->pcpu_refcnt == NULL) { kfree(this); @@ -47,7 +45,6 @@ struct cflayer *cffrml_create(u16 phyid, bool use_fcs) caif_assert(offsetof(struct cffrml, layer) == 0); - memset(this, 0, sizeof(struct cflayer)); this->layer.receive = cffrml_receive; this->layer.transmit = cffrml_transmit; this->layer.ctrlcmd = cffrml_ctrlcmd; @@ -139,20 +136,21 @@ static int cffrml_receive(struct cflayer *layr, struct cfpkt *pkt) static int cffrml_transmit(struct cflayer *layr, struct cfpkt *pkt) { - int tmp; u16 chks; u16 len; + __le16 data; + struct cffrml *this = container_obj(layr); if (this->dofcs) { chks = cfpkt_iterate(pkt, cffrml_checksum, 0xffff); - tmp = cpu_to_le16(chks); - cfpkt_add_trail(pkt, &tmp, 2); + data = cpu_to_le16(chks); + cfpkt_add_trail(pkt, &data, 2); } else { cfpkt_pad_trail(pkt, 2); } len = cfpkt_getlen(pkt); - tmp = cpu_to_le16(len); - cfpkt_add_head(pkt, &tmp, 2); + data = cpu_to_le16(len); + cfpkt_add_head(pkt, &data, 2); cfpkt_info(pkt)->hdr_len += 2; if (cfpkt_erroneous(pkt)) { pr_err("Packet is erroneous!\n"); @@ -179,14 +177,14 @@ void cffrml_put(struct cflayer *layr) { struct cffrml *this = container_obj(layr); if (layr != NULL && this->pcpu_refcnt != NULL) - irqsafe_cpu_dec(*this->pcpu_refcnt); + this_cpu_dec(*this->pcpu_refcnt); } void cffrml_hold(struct cflayer *layr) { struct cffrml *this = container_obj(layr); if (layr != NULL && this->pcpu_refcnt != NULL) - irqsafe_cpu_inc(*this->pcpu_refcnt); + this_cpu_inc(*this->pcpu_refcnt); } int cffrml_refcnt_read(struct cflayer *layr) diff --git a/net/caif/cfmuxl.c b/net/caif/cfmuxl.c index c23979e79dfa..b36f24a4c8e7 100644 --- a/net/caif/cfmuxl.c +++ b/net/caif/cfmuxl.c @@ -108,7 +108,7 @@ struct cflayer *cfmuxl_remove_dnlayer(struct cflayer *layr, u8 phyid) int idx = phyid % DN_CACHE_SIZE; spin_lock_bh(&muxl->transmit_lock); - rcu_assign_pointer(muxl->dn_cache[idx], NULL); + RCU_INIT_POINTER(muxl->dn_cache[idx], NULL); dn = get_from_id(&muxl->frml_list, phyid); if (dn == NULL) goto out; @@ -164,7 +164,7 @@ struct cflayer *cfmuxl_remove_uplayer(struct cflayer *layr, u8 id) if (up == NULL) goto out; - rcu_assign_pointer(muxl->up_cache[idx], NULL); + RCU_INIT_POINTER(muxl->up_cache[idx], NULL); list_del_rcu(&up->node); out: spin_unlock_bh(&muxl->receive_lock); @@ -261,7 +261,7 @@ static void cfmuxl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, idx = layer->id % UP_CACHE_SIZE; spin_lock_bh(&muxl->receive_lock); - rcu_assign_pointer(muxl->up_cache[idx], NULL); + RCU_INIT_POINTER(muxl->up_cache[idx], NULL); list_del_rcu(&layer->node); spin_unlock_bh(&muxl->receive_lock); } diff --git a/net/caif/cfpkt_skbuff.c b/net/caif/cfpkt_skbuff.c index 75d4bfae1a78..e335ba859b97 100644 --- a/net/caif/cfpkt_skbuff.c +++ b/net/caif/cfpkt_skbuff.c @@ -9,6 +9,7 @@ #include <linux/string.h> #include <linux/skbuff.h> #include <linux/hardirq.h> +#include <linux/export.h> #include <net/caif/cfpkt.h> #define PKT_PREFIX 48 @@ -62,7 +63,6 @@ static inline struct cfpkt *skb_to_pkt(struct sk_buff *skb) return (struct cfpkt *) skb; } - struct cfpkt *cfpkt_fromnative(enum caif_direction dir, void *nativepkt) { struct cfpkt *pkt = skb_to_pkt(nativepkt); @@ -104,14 +104,12 @@ void cfpkt_destroy(struct cfpkt *pkt) kfree_skb(skb); } - inline bool cfpkt_more(struct cfpkt *pkt) { struct sk_buff *skb = pkt_to_skb(pkt); return skb->len > 0; } - int cfpkt_peek_head(struct cfpkt *pkt, void *data, u16 len) { struct sk_buff *skb = pkt_to_skb(pkt); @@ -143,9 +141,11 @@ int cfpkt_extr_head(struct cfpkt *pkt, void *data, u16 len) } from = skb_pull(skb, len); from -= len; - memcpy(data, from, len); + if (data) + memcpy(data, from, len); return 0; } +EXPORT_SYMBOL(cfpkt_extr_head); int cfpkt_extr_trail(struct cfpkt *pkt, void *dta, u16 len) { @@ -169,13 +169,11 @@ int cfpkt_extr_trail(struct cfpkt *pkt, void *dta, u16 len) return 0; } - int cfpkt_pad_trail(struct cfpkt *pkt, u16 len) { return cfpkt_add_body(pkt, NULL, len); } - int cfpkt_add_body(struct cfpkt *pkt, const void *data, u16 len) { struct sk_buff *skb = pkt_to_skb(pkt); @@ -254,21 +252,19 @@ int cfpkt_add_head(struct cfpkt *pkt, const void *data2, u16 len) memcpy(to, data, len); return 0; } - +EXPORT_SYMBOL(cfpkt_add_head); inline int cfpkt_add_trail(struct cfpkt *pkt, const void *data, u16 len) { return cfpkt_add_body(pkt, data, len); } - inline u16 cfpkt_getlen(struct cfpkt *pkt) { struct sk_buff *skb = pkt_to_skb(pkt); return skb->len; } - inline u16 cfpkt_iterate(struct cfpkt *pkt, u16 (*iter_func)(u16, void *, u16), u16 data) @@ -286,7 +282,6 @@ inline u16 cfpkt_iterate(struct cfpkt *pkt, return iter_func(data, pkt->skb.data, cfpkt_getlen(pkt)); } - int cfpkt_setlen(struct cfpkt *pkt, u16 len) { struct sk_buff *skb = pkt_to_skb(pkt); @@ -398,3 +393,4 @@ struct caif_payload_info *cfpkt_info(struct cfpkt *pkt) { return (struct caif_payload_info *)&pkt_to_skb(pkt)->cb; } +EXPORT_SYMBOL(cfpkt_info); diff --git a/net/caif/cfrfml.c b/net/caif/cfrfml.c index 0deabb440051..6dc75d4f8d94 100644 --- a/net/caif/cfrfml.c +++ b/net/caif/cfrfml.c @@ -46,13 +46,10 @@ struct cflayer *cfrfml_create(u8 channel_id, struct dev_info *dev_info, int mtu_size) { int tmp; - struct cfrfml *this = - kzalloc(sizeof(struct cfrfml), GFP_ATOMIC); + struct cfrfml *this = kzalloc(sizeof(struct cfrfml), GFP_ATOMIC); - if (!this) { - pr_warn("Out of memory\n"); + if (!this) return NULL; - } cfsrvl_init(&this->serv, channel_id, dev_info, false); this->serv.release = cfrfml_release; @@ -193,7 +190,7 @@ out: static int cfrfml_transmit_segment(struct cfrfml *rfml, struct cfpkt *pkt) { - caif_assert(cfpkt_getlen(pkt) < rfml->fragment_size); + caif_assert(cfpkt_getlen(pkt) < rfml->fragment_size + RFM_HEAD_SIZE); /* Add info for MUX-layer to route the packet out. */ cfpkt_info(pkt)->channel_id = rfml->serv.layer.id; diff --git a/net/caif/cfserl.c b/net/caif/cfserl.c index 2715c84cfa87..8e68b97f13ee 100644 --- a/net/caif/cfserl.c +++ b/net/caif/cfserl.c @@ -31,19 +31,15 @@ static int cfserl_transmit(struct cflayer *layr, struct cfpkt *pkt); static void cfserl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, int phyid); -struct cflayer *cfserl_create(int type, int instance, bool use_stx) +struct cflayer *cfserl_create(int instance, bool use_stx) { - struct cfserl *this = kmalloc(sizeof(struct cfserl), GFP_ATOMIC); - if (!this) { - pr_warn("Out of memory\n"); + struct cfserl *this = kzalloc(sizeof(struct cfserl), GFP_ATOMIC); + if (!this) return NULL; - } caif_assert(offsetof(struct cfserl, layer) == 0); - memset(this, 0, sizeof(struct cfserl)); this->layer.receive = cfserl_receive; this->layer.transmit = cfserl_transmit; this->layer.ctrlcmd = cfserl_ctrlcmd; - this->layer.type = type; this->usestx = use_stx; spin_lock_init(&this->sync); snprintf(this->layer.name, CAIF_LAYER_NAME_SZ, "ser1"); diff --git a/net/caif/cfsrvl.c b/net/caif/cfsrvl.c index 535a1e72b366..b99f5b22689d 100644 --- a/net/caif/cfsrvl.c +++ b/net/caif/cfsrvl.c @@ -108,10 +108,8 @@ static int cfservl_modemcmd(struct cflayer *layr, enum caif_modemcmd ctrl) struct caif_payload_info *info; u8 flow_on = SRVL_FLOW_ON; pkt = cfpkt_create(SRVL_CTRL_PKT_SIZE); - if (!pkt) { - pr_warn("Out of memory\n"); + if (!pkt) return -ENOMEM; - } if (cfpkt_add_head(pkt, &flow_on, 1) < 0) { pr_err("Packet is erroneous!\n"); @@ -130,10 +128,8 @@ static int cfservl_modemcmd(struct cflayer *layr, enum caif_modemcmd ctrl) struct caif_payload_info *info; u8 flow_off = SRVL_FLOW_OFF; pkt = cfpkt_create(SRVL_CTRL_PKT_SIZE); - if (!pkt) { - pr_warn("Out of memory\n"); + if (!pkt) return -ENOMEM; - } if (cfpkt_add_head(pkt, &flow_off, 1) < 0) { pr_err("Packet is erroneous!\n"); diff --git a/net/caif/cfutill.c b/net/caif/cfutill.c index 98e027db18ed..53e49f3e3af3 100644 --- a/net/caif/cfutill.c +++ b/net/caif/cfutill.c @@ -26,13 +26,10 @@ static int cfutill_transmit(struct cflayer *layr, struct cfpkt *pkt); struct cflayer *cfutill_create(u8 channel_id, struct dev_info *dev_info) { - struct cfsrvl *util = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC); - if (!util) { - pr_warn("Out of memory\n"); + struct cfsrvl *util = kzalloc(sizeof(struct cfsrvl), GFP_ATOMIC); + if (!util) return NULL; - } caif_assert(offsetof(struct cfsrvl, layer) == 0); - memset(util, 0, sizeof(struct cfsrvl)); cfsrvl_init(util, channel_id, dev_info, true); util->layer.receive = cfutill_receive; util->layer.transmit = cfutill_transmit; diff --git a/net/caif/cfveil.c b/net/caif/cfveil.c index 3ec83fbc2887..910ab0661f66 100644 --- a/net/caif/cfveil.c +++ b/net/caif/cfveil.c @@ -25,13 +25,10 @@ static int cfvei_transmit(struct cflayer *layr, struct cfpkt *pkt); struct cflayer *cfvei_create(u8 channel_id, struct dev_info *dev_info) { - struct cfsrvl *vei = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC); - if (!vei) { - pr_warn("Out of memory\n"); + struct cfsrvl *vei = kzalloc(sizeof(struct cfsrvl), GFP_ATOMIC); + if (!vei) return NULL; - } caif_assert(offsetof(struct cfsrvl, layer) == 0); - memset(vei, 0, sizeof(struct cfsrvl)); cfsrvl_init(vei, channel_id, dev_info, true); vei->layer.receive = cfvei_receive; vei->layer.transmit = cfvei_transmit; diff --git a/net/caif/cfvidl.c b/net/caif/cfvidl.c index b2f5989ad455..e3f37db40ac3 100644 --- a/net/caif/cfvidl.c +++ b/net/caif/cfvidl.c @@ -21,14 +21,11 @@ static int cfvidl_transmit(struct cflayer *layr, struct cfpkt *pkt); struct cflayer *cfvidl_create(u8 channel_id, struct dev_info *dev_info) { - struct cfsrvl *vid = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC); - if (!vid) { - pr_warn("Out of memory\n"); + struct cfsrvl *vid = kzalloc(sizeof(struct cfsrvl), GFP_ATOMIC); + if (!vid) return NULL; - } caif_assert(offsetof(struct cfsrvl, layer) == 0); - memset(vid, 0, sizeof(struct cfsrvl)); cfsrvl_init(vid, channel_id, dev_info, false); vid->layer.receive = cfvidl_receive; vid->layer.transmit = cfvidl_transmit; diff --git a/net/can/Kconfig b/net/can/Kconfig index 89395b2c8bca..03200699d274 100644 --- a/net/can/Kconfig +++ b/net/can/Kconfig @@ -40,5 +40,16 @@ config CAN_BCM CAN messages are used on the bus (e.g. in automotive environments). To use the Broadcast Manager, use AF_CAN with protocol CAN_BCM. +config CAN_GW + tristate "CAN Gateway/Router (with netlink configuration)" + depends on CAN + default N + ---help--- + The CAN Gateway/Router is used to route (and modify) CAN frames. + It is based on the PF_CAN core infrastructure for msg filtering and + msg sending and can optionally modify routed CAN frames on the fly. + CAN frames can be routed between CAN network interfaces (one hop). + They can be modified with AND/OR/XOR/SET operations as configured + by the netlink configuration interface known e.g. from iptables. source "drivers/net/can/Kconfig" diff --git a/net/can/Makefile b/net/can/Makefile index 2d3894b32742..cef49eb1f5c7 100644 --- a/net/can/Makefile +++ b/net/can/Makefile @@ -10,3 +10,6 @@ can-raw-y := raw.o obj-$(CONFIG_CAN_BCM) += can-bcm.o can-bcm-y := bcm.o + +obj-$(CONFIG_CAN_GW) += can-gw.o +can-gw-y := gw.o diff --git a/net/can/af_can.c b/net/can/af_can.c index 9b0c32a2690c..0ce2ad0696da 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -38,8 +38,6 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH * DAMAGE. * - * Send feedback to <socketcan-users@lists.berlios.de> - * */ #include <linux/module.h> @@ -719,7 +717,7 @@ int can_proto_register(const struct can_proto *cp) proto); err = -EBUSY; } else - rcu_assign_pointer(proto_tab[proto], cp); + RCU_INIT_POINTER(proto_tab[proto], cp); mutex_unlock(&proto_tab_lock); @@ -740,7 +738,7 @@ void can_proto_unregister(const struct can_proto *cp) mutex_lock(&proto_tab_lock); BUG_ON(proto_tab[proto] != cp); - rcu_assign_pointer(proto_tab[proto], NULL); + RCU_INIT_POINTER(proto_tab[proto], NULL); mutex_unlock(&proto_tab_lock); synchronize_rcu(); diff --git a/net/can/af_can.h b/net/can/af_can.h index 34253b84e30f..fd882dbadad3 100644 --- a/net/can/af_can.h +++ b/net/can/af_can.h @@ -35,8 +35,6 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH * DAMAGE. * - * Send feedback to <socketcan-users@lists.berlios.de> - * */ #ifndef AF_CAN_H diff --git a/net/can/bcm.c b/net/can/bcm.c index c84963d2dee6..151b7730c12c 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -37,8 +37,6 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH * DAMAGE. * - * Send feedback to <socketcan-users@lists.berlios.de> - * */ #include <linux/module.h> diff --git a/net/can/gw.c b/net/can/gw.c new file mode 100644 index 000000000000..3d79b127881e --- /dev/null +++ b/net/can/gw.c @@ -0,0 +1,957 @@ +/* + * gw.c - CAN frame Gateway/Router/Bridge with netlink interface + * + * Copyright (c) 2011 Volkswagen Group Electronic Research + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of Volkswagen nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * Alternatively, provided that this notice is retained in full, this + * software may be distributed under the terms of the GNU General + * Public License ("GPL") version 2, in which case the provisions of the + * GPL apply INSTEAD OF those given above. + * + * The provided data structures and external interfaces from this code + * are not restricted to be used by modules with a GPL compatible license. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/types.h> +#include <linux/list.h> +#include <linux/spinlock.h> +#include <linux/rcupdate.h> +#include <linux/rculist.h> +#include <linux/net.h> +#include <linux/netdevice.h> +#include <linux/if_arp.h> +#include <linux/skbuff.h> +#include <linux/can.h> +#include <linux/can/core.h> +#include <linux/can/gw.h> +#include <net/rtnetlink.h> +#include <net/net_namespace.h> +#include <net/sock.h> + +#define CAN_GW_VERSION "20101209" +static __initdata const char banner[] = + KERN_INFO "can: netlink gateway (rev " CAN_GW_VERSION ")\n"; + +MODULE_DESCRIPTION("PF_CAN netlink gateway"); +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_AUTHOR("Oliver Hartkopp <oliver.hartkopp@volkswagen.de>"); +MODULE_ALIAS("can-gw"); + +HLIST_HEAD(cgw_list); +static struct notifier_block notifier; + +static struct kmem_cache *cgw_cache __read_mostly; + +/* structure that contains the (on-the-fly) CAN frame modifications */ +struct cf_mod { + struct { + struct can_frame and; + struct can_frame or; + struct can_frame xor; + struct can_frame set; + } modframe; + struct { + u8 and; + u8 or; + u8 xor; + u8 set; + } modtype; + void (*modfunc[MAX_MODFUNCTIONS])(struct can_frame *cf, + struct cf_mod *mod); + + /* CAN frame checksum calculation after CAN frame modifications */ + struct { + struct cgw_csum_xor xor; + struct cgw_csum_crc8 crc8; + } csum; + struct { + void (*xor)(struct can_frame *cf, struct cgw_csum_xor *xor); + void (*crc8)(struct can_frame *cf, struct cgw_csum_crc8 *crc8); + } csumfunc; +}; + + +/* + * So far we just support CAN -> CAN routing and frame modifications. + * + * The internal can_can_gw structure contains data and attributes for + * a CAN -> CAN gateway job. + */ +struct can_can_gw { + struct can_filter filter; + int src_idx; + int dst_idx; +}; + +/* list entry for CAN gateways jobs */ +struct cgw_job { + struct hlist_node list; + struct rcu_head rcu; + u32 handled_frames; + u32 dropped_frames; + struct cf_mod mod; + union { + /* CAN frame data source */ + struct net_device *dev; + } src; + union { + /* CAN frame data destination */ + struct net_device *dev; + } dst; + union { + struct can_can_gw ccgw; + /* tbc */ + }; + u8 gwtype; + u16 flags; +}; + +/* modification functions that are invoked in the hot path in can_can_gw_rcv */ + +#define MODFUNC(func, op) static void func(struct can_frame *cf, \ + struct cf_mod *mod) { op ; } + +MODFUNC(mod_and_id, cf->can_id &= mod->modframe.and.can_id) +MODFUNC(mod_and_dlc, cf->can_dlc &= mod->modframe.and.can_dlc) +MODFUNC(mod_and_data, *(u64 *)cf->data &= *(u64 *)mod->modframe.and.data) +MODFUNC(mod_or_id, cf->can_id |= mod->modframe.or.can_id) +MODFUNC(mod_or_dlc, cf->can_dlc |= mod->modframe.or.can_dlc) +MODFUNC(mod_or_data, *(u64 *)cf->data |= *(u64 *)mod->modframe.or.data) +MODFUNC(mod_xor_id, cf->can_id ^= mod->modframe.xor.can_id) +MODFUNC(mod_xor_dlc, cf->can_dlc ^= mod->modframe.xor.can_dlc) +MODFUNC(mod_xor_data, *(u64 *)cf->data ^= *(u64 *)mod->modframe.xor.data) +MODFUNC(mod_set_id, cf->can_id = mod->modframe.set.can_id) +MODFUNC(mod_set_dlc, cf->can_dlc = mod->modframe.set.can_dlc) +MODFUNC(mod_set_data, *(u64 *)cf->data = *(u64 *)mod->modframe.set.data) + +static inline void canframecpy(struct can_frame *dst, struct can_frame *src) +{ + /* + * Copy the struct members separately to ensure that no uninitialized + * data are copied in the 3 bytes hole of the struct. This is needed + * to make easy compares of the data in the struct cf_mod. + */ + + dst->can_id = src->can_id; + dst->can_dlc = src->can_dlc; + *(u64 *)dst->data = *(u64 *)src->data; +} + +static int cgw_chk_csum_parms(s8 fr, s8 to, s8 re) +{ + /* + * absolute dlc values 0 .. 7 => 0 .. 7, e.g. data [0] + * relative to received dlc -1 .. -8 : + * e.g. for received dlc = 8 + * -1 => index = 7 (data[7]) + * -3 => index = 5 (data[5]) + * -8 => index = 0 (data[0]) + */ + + if (fr > -9 && fr < 8 && + to > -9 && to < 8 && + re > -9 && re < 8) + return 0; + else + return -EINVAL; +} + +static inline int calc_idx(int idx, int rx_dlc) +{ + if (idx < 0) + return rx_dlc + idx; + else + return idx; +} + +static void cgw_csum_xor_rel(struct can_frame *cf, struct cgw_csum_xor *xor) +{ + int from = calc_idx(xor->from_idx, cf->can_dlc); + int to = calc_idx(xor->to_idx, cf->can_dlc); + int res = calc_idx(xor->result_idx, cf->can_dlc); + u8 val = xor->init_xor_val; + int i; + + if (from < 0 || to < 0 || res < 0) + return; + + if (from <= to) { + for (i = from; i <= to; i++) + val ^= cf->data[i]; + } else { + for (i = from; i >= to; i--) + val ^= cf->data[i]; + } + + cf->data[res] = val; +} + +static void cgw_csum_xor_pos(struct can_frame *cf, struct cgw_csum_xor *xor) +{ + u8 val = xor->init_xor_val; + int i; + + for (i = xor->from_idx; i <= xor->to_idx; i++) + val ^= cf->data[i]; + + cf->data[xor->result_idx] = val; +} + +static void cgw_csum_xor_neg(struct can_frame *cf, struct cgw_csum_xor *xor) +{ + u8 val = xor->init_xor_val; + int i; + + for (i = xor->from_idx; i >= xor->to_idx; i--) + val ^= cf->data[i]; + + cf->data[xor->result_idx] = val; +} + +static void cgw_csum_crc8_rel(struct can_frame *cf, struct cgw_csum_crc8 *crc8) +{ + int from = calc_idx(crc8->from_idx, cf->can_dlc); + int to = calc_idx(crc8->to_idx, cf->can_dlc); + int res = calc_idx(crc8->result_idx, cf->can_dlc); + u8 crc = crc8->init_crc_val; + int i; + + if (from < 0 || to < 0 || res < 0) + return; + + if (from <= to) { + for (i = crc8->from_idx; i <= crc8->to_idx; i++) + crc = crc8->crctab[crc^cf->data[i]]; + } else { + for (i = crc8->from_idx; i >= crc8->to_idx; i--) + crc = crc8->crctab[crc^cf->data[i]]; + } + + switch (crc8->profile) { + + case CGW_CRC8PRF_1U8: + crc = crc8->crctab[crc^crc8->profile_data[0]]; + break; + + case CGW_CRC8PRF_16U8: + crc = crc8->crctab[crc^crc8->profile_data[cf->data[1] & 0xF]]; + break; + + case CGW_CRC8PRF_SFFID_XOR: + crc = crc8->crctab[crc^(cf->can_id & 0xFF)^ + (cf->can_id >> 8 & 0xFF)]; + break; + + } + + cf->data[crc8->result_idx] = crc^crc8->final_xor_val; +} + +static void cgw_csum_crc8_pos(struct can_frame *cf, struct cgw_csum_crc8 *crc8) +{ + u8 crc = crc8->init_crc_val; + int i; + + for (i = crc8->from_idx; i <= crc8->to_idx; i++) + crc = crc8->crctab[crc^cf->data[i]]; + + switch (crc8->profile) { + + case CGW_CRC8PRF_1U8: + crc = crc8->crctab[crc^crc8->profile_data[0]]; + break; + + case CGW_CRC8PRF_16U8: + crc = crc8->crctab[crc^crc8->profile_data[cf->data[1] & 0xF]]; + break; + + case CGW_CRC8PRF_SFFID_XOR: + crc = crc8->crctab[crc^(cf->can_id & 0xFF)^ + (cf->can_id >> 8 & 0xFF)]; + break; + } + + cf->data[crc8->result_idx] = crc^crc8->final_xor_val; +} + +static void cgw_csum_crc8_neg(struct can_frame *cf, struct cgw_csum_crc8 *crc8) +{ + u8 crc = crc8->init_crc_val; + int i; + + for (i = crc8->from_idx; i >= crc8->to_idx; i--) + crc = crc8->crctab[crc^cf->data[i]]; + + switch (crc8->profile) { + + case CGW_CRC8PRF_1U8: + crc = crc8->crctab[crc^crc8->profile_data[0]]; + break; + + case CGW_CRC8PRF_16U8: + crc = crc8->crctab[crc^crc8->profile_data[cf->data[1] & 0xF]]; + break; + + case CGW_CRC8PRF_SFFID_XOR: + crc = crc8->crctab[crc^(cf->can_id & 0xFF)^ + (cf->can_id >> 8 & 0xFF)]; + break; + } + + cf->data[crc8->result_idx] = crc^crc8->final_xor_val; +} + +/* the receive & process & send function */ +static void can_can_gw_rcv(struct sk_buff *skb, void *data) +{ + struct cgw_job *gwj = (struct cgw_job *)data; + struct can_frame *cf; + struct sk_buff *nskb; + int modidx = 0; + + /* do not handle already routed frames - see comment below */ + if (skb_mac_header_was_set(skb)) + return; + + if (!(gwj->dst.dev->flags & IFF_UP)) { + gwj->dropped_frames++; + return; + } + + /* + * clone the given skb, which has not been done in can_rcv() + * + * When there is at least one modification function activated, + * we need to copy the skb as we want to modify skb->data. + */ + if (gwj->mod.modfunc[0]) + nskb = skb_copy(skb, GFP_ATOMIC); + else + nskb = skb_clone(skb, GFP_ATOMIC); + + if (!nskb) { + gwj->dropped_frames++; + return; + } + + /* + * Mark routed frames by setting some mac header length which is + * not relevant for the CAN frames located in the skb->data section. + * + * As dev->header_ops is not set in CAN netdevices no one is ever + * accessing the various header offsets in the CAN skbuffs anyway. + * E.g. using the packet socket to read CAN frames is still working. + */ + skb_set_mac_header(nskb, 8); + nskb->dev = gwj->dst.dev; + + /* pointer to modifiable CAN frame */ + cf = (struct can_frame *)nskb->data; + + /* perform preprocessed modification functions if there are any */ + while (modidx < MAX_MODFUNCTIONS && gwj->mod.modfunc[modidx]) + (*gwj->mod.modfunc[modidx++])(cf, &gwj->mod); + + /* check for checksum updates when the CAN frame has been modified */ + if (modidx) { + if (gwj->mod.csumfunc.crc8) + (*gwj->mod.csumfunc.crc8)(cf, &gwj->mod.csum.crc8); + + if (gwj->mod.csumfunc.xor) + (*gwj->mod.csumfunc.xor)(cf, &gwj->mod.csum.xor); + } + + /* clear the skb timestamp if not configured the other way */ + if (!(gwj->flags & CGW_FLAGS_CAN_SRC_TSTAMP)) + nskb->tstamp.tv64 = 0; + + /* send to netdevice */ + if (can_send(nskb, gwj->flags & CGW_FLAGS_CAN_ECHO)) + gwj->dropped_frames++; + else + gwj->handled_frames++; +} + +static inline int cgw_register_filter(struct cgw_job *gwj) +{ + return can_rx_register(gwj->src.dev, gwj->ccgw.filter.can_id, + gwj->ccgw.filter.can_mask, can_can_gw_rcv, + gwj, "gw"); +} + +static inline void cgw_unregister_filter(struct cgw_job *gwj) +{ + can_rx_unregister(gwj->src.dev, gwj->ccgw.filter.can_id, + gwj->ccgw.filter.can_mask, can_can_gw_rcv, gwj); +} + +static int cgw_notifier(struct notifier_block *nb, + unsigned long msg, void *data) +{ + struct net_device *dev = (struct net_device *)data; + + if (!net_eq(dev_net(dev), &init_net)) + return NOTIFY_DONE; + if (dev->type != ARPHRD_CAN) + return NOTIFY_DONE; + + if (msg == NETDEV_UNREGISTER) { + + struct cgw_job *gwj = NULL; + struct hlist_node *n, *nx; + + ASSERT_RTNL(); + + hlist_for_each_entry_safe(gwj, n, nx, &cgw_list, list) { + + if (gwj->src.dev == dev || gwj->dst.dev == dev) { + hlist_del(&gwj->list); + cgw_unregister_filter(gwj); + kfree(gwj); + } + } + } + + return NOTIFY_DONE; +} + +static int cgw_put_job(struct sk_buff *skb, struct cgw_job *gwj) +{ + struct cgw_frame_mod mb; + struct rtcanmsg *rtcan; + struct nlmsghdr *nlh = nlmsg_put(skb, 0, 0, 0, sizeof(*rtcan), 0); + if (!nlh) + return -EMSGSIZE; + + rtcan = nlmsg_data(nlh); + rtcan->can_family = AF_CAN; + rtcan->gwtype = gwj->gwtype; + rtcan->flags = gwj->flags; + + /* add statistics if available */ + + if (gwj->handled_frames) { + if (nla_put_u32(skb, CGW_HANDLED, gwj->handled_frames) < 0) + goto cancel; + else + nlh->nlmsg_len += NLA_HDRLEN + NLA_ALIGN(sizeof(u32)); + } + + if (gwj->dropped_frames) { + if (nla_put_u32(skb, CGW_DROPPED, gwj->dropped_frames) < 0) + goto cancel; + else + nlh->nlmsg_len += NLA_HDRLEN + NLA_ALIGN(sizeof(u32)); + } + + /* check non default settings of attributes */ + + if (gwj->mod.modtype.and) { + memcpy(&mb.cf, &gwj->mod.modframe.and, sizeof(mb.cf)); + mb.modtype = gwj->mod.modtype.and; + if (nla_put(skb, CGW_MOD_AND, sizeof(mb), &mb) < 0) + goto cancel; + else + nlh->nlmsg_len += NLA_HDRLEN + NLA_ALIGN(sizeof(mb)); + } + + if (gwj->mod.modtype.or) { + memcpy(&mb.cf, &gwj->mod.modframe.or, sizeof(mb.cf)); + mb.modtype = gwj->mod.modtype.or; + if (nla_put(skb, CGW_MOD_OR, sizeof(mb), &mb) < 0) + goto cancel; + else + nlh->nlmsg_len += NLA_HDRLEN + NLA_ALIGN(sizeof(mb)); + } + + if (gwj->mod.modtype.xor) { + memcpy(&mb.cf, &gwj->mod.modframe.xor, sizeof(mb.cf)); + mb.modtype = gwj->mod.modtype.xor; + if (nla_put(skb, CGW_MOD_XOR, sizeof(mb), &mb) < 0) + goto cancel; + else + nlh->nlmsg_len += NLA_HDRLEN + NLA_ALIGN(sizeof(mb)); + } + + if (gwj->mod.modtype.set) { + memcpy(&mb.cf, &gwj->mod.modframe.set, sizeof(mb.cf)); + mb.modtype = gwj->mod.modtype.set; + if (nla_put(skb, CGW_MOD_SET, sizeof(mb), &mb) < 0) + goto cancel; + else + nlh->nlmsg_len += NLA_HDRLEN + NLA_ALIGN(sizeof(mb)); + } + + if (gwj->mod.csumfunc.crc8) { + if (nla_put(skb, CGW_CS_CRC8, CGW_CS_CRC8_LEN, + &gwj->mod.csum.crc8) < 0) + goto cancel; + else + nlh->nlmsg_len += NLA_HDRLEN + \ + NLA_ALIGN(CGW_CS_CRC8_LEN); + } + + if (gwj->mod.csumfunc.xor) { + if (nla_put(skb, CGW_CS_XOR, CGW_CS_XOR_LEN, + &gwj->mod.csum.xor) < 0) + goto cancel; + else + nlh->nlmsg_len += NLA_HDRLEN + \ + NLA_ALIGN(CGW_CS_XOR_LEN); + } + + if (gwj->gwtype == CGW_TYPE_CAN_CAN) { + + if (gwj->ccgw.filter.can_id || gwj->ccgw.filter.can_mask) { + if (nla_put(skb, CGW_FILTER, sizeof(struct can_filter), + &gwj->ccgw.filter) < 0) + goto cancel; + else + nlh->nlmsg_len += NLA_HDRLEN + + NLA_ALIGN(sizeof(struct can_filter)); + } + + if (nla_put_u32(skb, CGW_SRC_IF, gwj->ccgw.src_idx) < 0) + goto cancel; + else + nlh->nlmsg_len += NLA_HDRLEN + NLA_ALIGN(sizeof(u32)); + + if (nla_put_u32(skb, CGW_DST_IF, gwj->ccgw.dst_idx) < 0) + goto cancel; + else + nlh->nlmsg_len += NLA_HDRLEN + NLA_ALIGN(sizeof(u32)); + } + + return skb->len; + +cancel: + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; +} + +/* Dump information about all CAN gateway jobs, in response to RTM_GETROUTE */ +static int cgw_dump_jobs(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct cgw_job *gwj = NULL; + struct hlist_node *n; + int idx = 0; + int s_idx = cb->args[0]; + + rcu_read_lock(); + hlist_for_each_entry_rcu(gwj, n, &cgw_list, list) { + if (idx < s_idx) + goto cont; + + if (cgw_put_job(skb, gwj) < 0) + break; +cont: + idx++; + } + rcu_read_unlock(); + + cb->args[0] = idx; + + return skb->len; +} + +/* check for common and gwtype specific attributes */ +static int cgw_parse_attr(struct nlmsghdr *nlh, struct cf_mod *mod, + u8 gwtype, void *gwtypeattr) +{ + struct nlattr *tb[CGW_MAX+1]; + struct cgw_frame_mod mb; + int modidx = 0; + int err = 0; + + /* initialize modification & checksum data space */ + memset(mod, 0, sizeof(*mod)); + + err = nlmsg_parse(nlh, sizeof(struct rtcanmsg), tb, CGW_MAX, NULL); + if (err < 0) + return err; + + /* check for AND/OR/XOR/SET modifications */ + + if (tb[CGW_MOD_AND] && + nla_len(tb[CGW_MOD_AND]) == CGW_MODATTR_LEN) { + nla_memcpy(&mb, tb[CGW_MOD_AND], CGW_MODATTR_LEN); + + canframecpy(&mod->modframe.and, &mb.cf); + mod->modtype.and = mb.modtype; + + if (mb.modtype & CGW_MOD_ID) + mod->modfunc[modidx++] = mod_and_id; + + if (mb.modtype & CGW_MOD_DLC) + mod->modfunc[modidx++] = mod_and_dlc; + + if (mb.modtype & CGW_MOD_DATA) + mod->modfunc[modidx++] = mod_and_data; + } + + if (tb[CGW_MOD_OR] && + nla_len(tb[CGW_MOD_OR]) == CGW_MODATTR_LEN) { + nla_memcpy(&mb, tb[CGW_MOD_OR], CGW_MODATTR_LEN); + + canframecpy(&mod->modframe.or, &mb.cf); + mod->modtype.or = mb.modtype; + + if (mb.modtype & CGW_MOD_ID) + mod->modfunc[modidx++] = mod_or_id; + + if (mb.modtype & CGW_MOD_DLC) + mod->modfunc[modidx++] = mod_or_dlc; + + if (mb.modtype & CGW_MOD_DATA) + mod->modfunc[modidx++] = mod_or_data; + } + + if (tb[CGW_MOD_XOR] && + nla_len(tb[CGW_MOD_XOR]) == CGW_MODATTR_LEN) { + nla_memcpy(&mb, tb[CGW_MOD_XOR], CGW_MODATTR_LEN); + + canframecpy(&mod->modframe.xor, &mb.cf); + mod->modtype.xor = mb.modtype; + + if (mb.modtype & CGW_MOD_ID) + mod->modfunc[modidx++] = mod_xor_id; + + if (mb.modtype & CGW_MOD_DLC) + mod->modfunc[modidx++] = mod_xor_dlc; + + if (mb.modtype & CGW_MOD_DATA) + mod->modfunc[modidx++] = mod_xor_data; + } + + if (tb[CGW_MOD_SET] && + nla_len(tb[CGW_MOD_SET]) == CGW_MODATTR_LEN) { + nla_memcpy(&mb, tb[CGW_MOD_SET], CGW_MODATTR_LEN); + + canframecpy(&mod->modframe.set, &mb.cf); + mod->modtype.set = mb.modtype; + + if (mb.modtype & CGW_MOD_ID) + mod->modfunc[modidx++] = mod_set_id; + + if (mb.modtype & CGW_MOD_DLC) + mod->modfunc[modidx++] = mod_set_dlc; + + if (mb.modtype & CGW_MOD_DATA) + mod->modfunc[modidx++] = mod_set_data; + } + + /* check for checksum operations after CAN frame modifications */ + if (modidx) { + + if (tb[CGW_CS_CRC8] && + nla_len(tb[CGW_CS_CRC8]) == CGW_CS_CRC8_LEN) { + + struct cgw_csum_crc8 *c = (struct cgw_csum_crc8 *)\ + nla_data(tb[CGW_CS_CRC8]); + + err = cgw_chk_csum_parms(c->from_idx, c->to_idx, + c->result_idx); + if (err) + return err; + + nla_memcpy(&mod->csum.crc8, tb[CGW_CS_CRC8], + CGW_CS_CRC8_LEN); + + /* + * select dedicated processing function to reduce + * runtime operations in receive hot path. + */ + if (c->from_idx < 0 || c->to_idx < 0 || + c->result_idx < 0) + mod->csumfunc.crc8 = cgw_csum_crc8_rel; + else if (c->from_idx <= c->to_idx) + mod->csumfunc.crc8 = cgw_csum_crc8_pos; + else + mod->csumfunc.crc8 = cgw_csum_crc8_neg; + } + + if (tb[CGW_CS_XOR] && + nla_len(tb[CGW_CS_XOR]) == CGW_CS_XOR_LEN) { + + struct cgw_csum_xor *c = (struct cgw_csum_xor *)\ + nla_data(tb[CGW_CS_XOR]); + + err = cgw_chk_csum_parms(c->from_idx, c->to_idx, + c->result_idx); + if (err) + return err; + + nla_memcpy(&mod->csum.xor, tb[CGW_CS_XOR], + CGW_CS_XOR_LEN); + + /* + * select dedicated processing function to reduce + * runtime operations in receive hot path. + */ + if (c->from_idx < 0 || c->to_idx < 0 || + c->result_idx < 0) + mod->csumfunc.xor = cgw_csum_xor_rel; + else if (c->from_idx <= c->to_idx) + mod->csumfunc.xor = cgw_csum_xor_pos; + else + mod->csumfunc.xor = cgw_csum_xor_neg; + } + } + + if (gwtype == CGW_TYPE_CAN_CAN) { + + /* check CGW_TYPE_CAN_CAN specific attributes */ + + struct can_can_gw *ccgw = (struct can_can_gw *)gwtypeattr; + memset(ccgw, 0, sizeof(*ccgw)); + + /* check for can_filter in attributes */ + if (tb[CGW_FILTER] && + nla_len(tb[CGW_FILTER]) == sizeof(struct can_filter)) + nla_memcpy(&ccgw->filter, tb[CGW_FILTER], + sizeof(struct can_filter)); + + err = -ENODEV; + + /* specifying two interfaces is mandatory */ + if (!tb[CGW_SRC_IF] || !tb[CGW_DST_IF]) + return err; + + if (nla_len(tb[CGW_SRC_IF]) == sizeof(u32)) + nla_memcpy(&ccgw->src_idx, tb[CGW_SRC_IF], + sizeof(u32)); + + if (nla_len(tb[CGW_DST_IF]) == sizeof(u32)) + nla_memcpy(&ccgw->dst_idx, tb[CGW_DST_IF], + sizeof(u32)); + + /* both indices set to 0 for flushing all routing entries */ + if (!ccgw->src_idx && !ccgw->dst_idx) + return 0; + + /* only one index set to 0 is an error */ + if (!ccgw->src_idx || !ccgw->dst_idx) + return err; + } + + /* add the checks for other gwtypes here */ + + return 0; +} + +static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh, + void *arg) +{ + struct rtcanmsg *r; + struct cgw_job *gwj; + int err = 0; + + if (nlmsg_len(nlh) < sizeof(*r)) + return -EINVAL; + + r = nlmsg_data(nlh); + if (r->can_family != AF_CAN) + return -EPFNOSUPPORT; + + /* so far we only support CAN -> CAN routings */ + if (r->gwtype != CGW_TYPE_CAN_CAN) + return -EINVAL; + + gwj = kmem_cache_alloc(cgw_cache, GFP_KERNEL); + if (!gwj) + return -ENOMEM; + + gwj->handled_frames = 0; + gwj->dropped_frames = 0; + gwj->flags = r->flags; + gwj->gwtype = r->gwtype; + + err = cgw_parse_attr(nlh, &gwj->mod, CGW_TYPE_CAN_CAN, &gwj->ccgw); + if (err < 0) + goto out; + + err = -ENODEV; + + /* ifindex == 0 is not allowed for job creation */ + if (!gwj->ccgw.src_idx || !gwj->ccgw.dst_idx) + goto out; + + gwj->src.dev = dev_get_by_index(&init_net, gwj->ccgw.src_idx); + + if (!gwj->src.dev) + goto out; + + /* check for CAN netdev not using header_ops - see gw_rcv() */ + if (gwj->src.dev->type != ARPHRD_CAN || gwj->src.dev->header_ops) + goto put_src_out; + + gwj->dst.dev = dev_get_by_index(&init_net, gwj->ccgw.dst_idx); + + if (!gwj->dst.dev) + goto put_src_out; + + /* check for CAN netdev not using header_ops - see gw_rcv() */ + if (gwj->dst.dev->type != ARPHRD_CAN || gwj->dst.dev->header_ops) + goto put_src_dst_out; + + ASSERT_RTNL(); + + err = cgw_register_filter(gwj); + if (!err) + hlist_add_head_rcu(&gwj->list, &cgw_list); + +put_src_dst_out: + dev_put(gwj->dst.dev); +put_src_out: + dev_put(gwj->src.dev); +out: + if (err) + kmem_cache_free(cgw_cache, gwj); + + return err; +} + +static void cgw_remove_all_jobs(void) +{ + struct cgw_job *gwj = NULL; + struct hlist_node *n, *nx; + + ASSERT_RTNL(); + + hlist_for_each_entry_safe(gwj, n, nx, &cgw_list, list) { + hlist_del(&gwj->list); + cgw_unregister_filter(gwj); + kfree(gwj); + } +} + +static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +{ + struct cgw_job *gwj = NULL; + struct hlist_node *n, *nx; + struct rtcanmsg *r; + struct cf_mod mod; + struct can_can_gw ccgw; + int err = 0; + + if (nlmsg_len(nlh) < sizeof(*r)) + return -EINVAL; + + r = nlmsg_data(nlh); + if (r->can_family != AF_CAN) + return -EPFNOSUPPORT; + + /* so far we only support CAN -> CAN routings */ + if (r->gwtype != CGW_TYPE_CAN_CAN) + return -EINVAL; + + err = cgw_parse_attr(nlh, &mod, CGW_TYPE_CAN_CAN, &ccgw); + if (err < 0) + return err; + + /* two interface indices both set to 0 => remove all entries */ + if (!ccgw.src_idx && !ccgw.dst_idx) { + cgw_remove_all_jobs(); + return 0; + } + + err = -EINVAL; + + ASSERT_RTNL(); + + /* remove only the first matching entry */ + hlist_for_each_entry_safe(gwj, n, nx, &cgw_list, list) { + + if (gwj->flags != r->flags) + continue; + + if (memcmp(&gwj->mod, &mod, sizeof(mod))) + continue; + + /* if (r->gwtype == CGW_TYPE_CAN_CAN) - is made sure here */ + if (memcmp(&gwj->ccgw, &ccgw, sizeof(ccgw))) + continue; + + hlist_del(&gwj->list); + cgw_unregister_filter(gwj); + kfree(gwj); + err = 0; + break; + } + + return err; +} + +static __init int cgw_module_init(void) +{ + printk(banner); + + cgw_cache = kmem_cache_create("can_gw", sizeof(struct cgw_job), + 0, 0, NULL); + + if (!cgw_cache) + return -ENOMEM; + + /* set notifier */ + notifier.notifier_call = cgw_notifier; + register_netdevice_notifier(¬ifier); + + if (__rtnl_register(PF_CAN, RTM_GETROUTE, NULL, cgw_dump_jobs, NULL)) { + unregister_netdevice_notifier(¬ifier); + kmem_cache_destroy(cgw_cache); + return -ENOBUFS; + } + + /* Only the first call to __rtnl_register can fail */ + __rtnl_register(PF_CAN, RTM_NEWROUTE, cgw_create_job, NULL, NULL); + __rtnl_register(PF_CAN, RTM_DELROUTE, cgw_remove_job, NULL, NULL); + + return 0; +} + +static __exit void cgw_module_exit(void) +{ + rtnl_unregister_all(PF_CAN); + + unregister_netdevice_notifier(¬ifier); + + rtnl_lock(); + cgw_remove_all_jobs(); + rtnl_unlock(); + + rcu_barrier(); /* Wait for completion of call_rcu()'s */ + + kmem_cache_destroy(cgw_cache); +} + +module_init(cgw_module_init); +module_exit(cgw_module_exit); diff --git a/net/can/proc.c b/net/can/proc.c index 0016f7339699..ba873c36d2fd 100644 --- a/net/can/proc.c +++ b/net/can/proc.c @@ -37,8 +37,6 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH * DAMAGE. * - * Send feedback to <socketcan-users@lists.berlios.de> - * */ #include <linux/module.h> diff --git a/net/can/raw.c b/net/can/raw.c index dea99a6e596c..cde1b4a20f75 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -37,8 +37,6 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH * DAMAGE. * - * Send feedback to <socketcan-users@lists.berlios.de> - * */ #include <linux/module.h> diff --git a/net/ceph/Kconfig b/net/ceph/Kconfig index be683f2d401f..cc04dd667a10 100644 --- a/net/ceph/Kconfig +++ b/net/ceph/Kconfig @@ -27,3 +27,17 @@ config CEPH_LIB_PRETTYDEBUG If unsure, say N. +config CEPH_LIB_USE_DNS_RESOLVER + bool "Use in-kernel support for DNS lookup" + depends on CEPH_LIB + select DNS_RESOLVER + default n + help + If you say Y here, hostnames (e.g. monitor addresses) will + be resolved using the CONFIG_DNS_RESOLVER facility. + + For information on how to use CONFIG_DNS_RESOLVER consult + Documentation/networking/dns_resolver.txt + + If unsure, say N. + diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index 2883ea01e680..97f70e50ad3b 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c @@ -432,9 +432,12 @@ EXPORT_SYMBOL(ceph_client_id); /* * create a fresh client instance */ -struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private) +struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private, + unsigned supported_features, + unsigned required_features) { struct ceph_client *client; + struct ceph_entity_addr *myaddr = NULL; int err = -ENOMEM; client = kzalloc(sizeof(*client), GFP_KERNEL); @@ -449,15 +452,27 @@ struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private) client->auth_err = 0; client->extra_mon_dispatch = NULL; - client->supported_features = CEPH_FEATURE_SUPPORTED_DEFAULT; - client->required_features = CEPH_FEATURE_REQUIRED_DEFAULT; - - client->msgr = NULL; + client->supported_features = CEPH_FEATURE_SUPPORTED_DEFAULT | + supported_features; + client->required_features = CEPH_FEATURE_REQUIRED_DEFAULT | + required_features; + + /* msgr */ + if (ceph_test_opt(client, MYIP)) + myaddr = &client->options->my_addr; + client->msgr = ceph_messenger_create(myaddr, + client->supported_features, + client->required_features); + if (IS_ERR(client->msgr)) { + err = PTR_ERR(client->msgr); + goto fail; + } + client->msgr->nocrc = ceph_test_opt(client, NOCRC); /* subsystems */ err = ceph_monc_init(&client->monc, client); if (err < 0) - goto fail; + goto fail_msgr; err = ceph_osdc_init(&client->osdc, client); if (err < 0) goto fail_monc; @@ -466,6 +481,8 @@ struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private) fail_monc: ceph_monc_stop(&client->monc); +fail_msgr: + ceph_messenger_destroy(client->msgr); fail: kfree(client); return ERR_PTR(err); @@ -490,8 +507,7 @@ void ceph_destroy_client(struct ceph_client *client) ceph_debugfs_client_cleanup(client); - if (client->msgr) - ceph_messenger_destroy(client->msgr); + ceph_messenger_destroy(client->msgr); ceph_destroy_options(client->options); @@ -514,24 +530,9 @@ static int have_mon_and_osd_map(struct ceph_client *client) */ int __ceph_open_session(struct ceph_client *client, unsigned long started) { - struct ceph_entity_addr *myaddr = NULL; int err; unsigned long timeout = client->options->mount_timeout * HZ; - /* initialize the messenger */ - if (client->msgr == NULL) { - if (ceph_test_opt(client, MYIP)) - myaddr = &client->options->my_addr; - client->msgr = ceph_messenger_create(myaddr, - client->supported_features, - client->required_features); - if (IS_ERR(client->msgr)) { - client->msgr = NULL; - return PTR_ERR(client->msgr); - } - client->msgr->nocrc = ceph_test_opt(client, NOCRC); - } - /* open session, and wait for mon and osd maps */ err = ceph_monc_open_session(&client->monc); if (err < 0) diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c index 42599e31dcad..b79747c4b645 100644 --- a/net/ceph/crush/mapper.c +++ b/net/ceph/crush/mapper.c @@ -477,7 +477,6 @@ int crush_do_rule(struct crush_map *map, int i, j; int numrep; int firstn; - int rc = -1; BUG_ON(ruleno >= map->max_rules); @@ -491,23 +490,18 @@ int crush_do_rule(struct crush_map *map, * that this may or may not correspond to the specific types * referenced by the crush rule. */ - if (force >= 0) { - if (force >= map->max_devices || - map->device_parents[force] == 0) { - /*dprintk("CRUSH: forcefed device dne\n");*/ - rc = -1; /* force fed device dne */ - goto out; - } - if (!is_out(map, weight, force, x)) { - while (1) { - force_context[++force_pos] = force; - if (force >= 0) - force = map->device_parents[force]; - else - force = map->bucket_parents[-1-force]; - if (force == 0) - break; - } + if (force >= 0 && + force < map->max_devices && + map->device_parents[force] != 0 && + !is_out(map, weight, force, x)) { + while (1) { + force_context[++force_pos] = force; + if (force >= 0) + force = map->device_parents[force]; + else + force = map->bucket_parents[-1-force]; + if (force == 0) + break; } } @@ -516,10 +510,15 @@ int crush_do_rule(struct crush_map *map, switch (rule->steps[step].op) { case CRUSH_RULE_TAKE: w[0] = rule->steps[step].arg1; - if (force_pos >= 0) { - BUG_ON(force_context[force_pos] != w[0]); + + /* find position in force_context/hierarchy */ + while (force_pos >= 0 && + force_context[force_pos] != w[0]) force_pos--; - } + /* and move past it */ + if (force_pos >= 0) + force_pos--; + wsize = 1; break; @@ -600,10 +599,7 @@ int crush_do_rule(struct crush_map *map, BUG_ON(1); } } - rc = result_len; - -out: - return rc; + return result_len; } diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c index 85f3bc0a7062..b780cb7947dd 100644 --- a/net/ceph/crypto.c +++ b/net/ceph/crypto.c @@ -15,10 +15,9 @@ int ceph_crypto_key_clone(struct ceph_crypto_key *dst, const struct ceph_crypto_key *src) { memcpy(dst, src, sizeof(struct ceph_crypto_key)); - dst->key = kmalloc(src->len, GFP_NOFS); + dst->key = kmemdup(src->key, src->len, GFP_NOFS); if (!dst->key) return -ENOMEM; - memcpy(dst->key, src->key, src->len); return 0; } diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 9918e9eb276e..ad5b70801f37 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -11,12 +11,14 @@ #include <linux/string.h> #include <linux/bio.h> #include <linux/blkdev.h> +#include <linux/dns_resolver.h> #include <net/tcp.h> #include <linux/ceph/libceph.h> #include <linux/ceph/messenger.h> #include <linux/ceph/decode.h> #include <linux/ceph/pagelist.h> +#include <linux/export.h> /* * Ceph uses the messenger to exchange ceph_msg messages with other @@ -1078,6 +1080,101 @@ static void addr_set_port(struct sockaddr_storage *ss, int p) } /* + * Unlike other *_pton function semantics, zero indicates success. + */ +static int ceph_pton(const char *str, size_t len, struct sockaddr_storage *ss, + char delim, const char **ipend) +{ + struct sockaddr_in *in4 = (void *)ss; + struct sockaddr_in6 *in6 = (void *)ss; + + memset(ss, 0, sizeof(*ss)); + + if (in4_pton(str, len, (u8 *)&in4->sin_addr.s_addr, delim, ipend)) { + ss->ss_family = AF_INET; + return 0; + } + + if (in6_pton(str, len, (u8 *)&in6->sin6_addr.s6_addr, delim, ipend)) { + ss->ss_family = AF_INET6; + return 0; + } + + return -EINVAL; +} + +/* + * Extract hostname string and resolve using kernel DNS facility. + */ +#ifdef CONFIG_CEPH_LIB_USE_DNS_RESOLVER +static int ceph_dns_resolve_name(const char *name, size_t namelen, + struct sockaddr_storage *ss, char delim, const char **ipend) +{ + const char *end, *delim_p; + char *colon_p, *ip_addr = NULL; + int ip_len, ret; + + /* + * The end of the hostname occurs immediately preceding the delimiter or + * the port marker (':') where the delimiter takes precedence. + */ + delim_p = memchr(name, delim, namelen); + colon_p = memchr(name, ':', namelen); + + if (delim_p && colon_p) + end = delim_p < colon_p ? delim_p : colon_p; + else if (!delim_p && colon_p) + end = colon_p; + else { + end = delim_p; + if (!end) /* case: hostname:/ */ + end = name + namelen; + } + + if (end <= name) + return -EINVAL; + + /* do dns_resolve upcall */ + ip_len = dns_query(NULL, name, end - name, NULL, &ip_addr, NULL); + if (ip_len > 0) + ret = ceph_pton(ip_addr, ip_len, ss, -1, NULL); + else + ret = -ESRCH; + + kfree(ip_addr); + + *ipend = end; + + pr_info("resolve '%.*s' (ret=%d): %s\n", (int)(end - name), name, + ret, ret ? "failed" : ceph_pr_addr(ss)); + + return ret; +} +#else +static inline int ceph_dns_resolve_name(const char *name, size_t namelen, + struct sockaddr_storage *ss, char delim, const char **ipend) +{ + return -EINVAL; +} +#endif + +/* + * Parse a server name (IP or hostname). If a valid IP address is not found + * then try to extract a hostname to resolve using userspace DNS upcall. + */ +static int ceph_parse_server_name(const char *name, size_t namelen, + struct sockaddr_storage *ss, char delim, const char **ipend) +{ + int ret; + + ret = ceph_pton(name, namelen, ss, delim, ipend); + if (ret) + ret = ceph_dns_resolve_name(name, namelen, ss, delim, ipend); + + return ret; +} + +/* * Parse an ip[:port] list into an addr array. Use the default * monitor port if a port isn't specified. */ @@ -1085,15 +1182,13 @@ int ceph_parse_ips(const char *c, const char *end, struct ceph_entity_addr *addr, int max_count, int *count) { - int i; + int i, ret = -EINVAL; const char *p = c; dout("parse_ips on '%.*s'\n", (int)(end-c), c); for (i = 0; i < max_count; i++) { const char *ipend; struct sockaddr_storage *ss = &addr[i].in_addr; - struct sockaddr_in *in4 = (void *)ss; - struct sockaddr_in6 *in6 = (void *)ss; int port; char delim = ','; @@ -1102,15 +1197,11 @@ int ceph_parse_ips(const char *c, const char *end, p++; } - memset(ss, 0, sizeof(*ss)); - if (in4_pton(p, end - p, (u8 *)&in4->sin_addr.s_addr, - delim, &ipend)) - ss->ss_family = AF_INET; - else if (in6_pton(p, end - p, (u8 *)&in6->sin6_addr.s6_addr, - delim, &ipend)) - ss->ss_family = AF_INET6; - else + ret = ceph_parse_server_name(p, end - p, ss, delim, &ipend); + if (ret) goto bad; + ret = -EINVAL; + p = ipend; if (delim == ']') { @@ -1155,7 +1246,7 @@ int ceph_parse_ips(const char *c, const char *end, bad: pr_err("parse_ips bad ip '%.*s'\n", (int)(end - c), c); - return -EINVAL; + return ret; } EXPORT_SYMBOL(ceph_parse_ips); @@ -2281,7 +2372,8 @@ EXPORT_SYMBOL(ceph_con_keepalive); * construct a new message with given type, size * the new msg has a ref count of 1. */ -struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags) +struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags, + bool can_fail) { struct ceph_msg *m; @@ -2333,7 +2425,7 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags) m->front.iov_base = kmalloc(front_len, flags); } if (m->front.iov_base == NULL) { - pr_err("msg_new can't allocate %d bytes\n", + dout("ceph_msg_new can't allocate %d bytes\n", front_len); goto out2; } @@ -2348,7 +2440,14 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags) out2: ceph_msg_put(m); out: - pr_err("msg_new can't create type %d front %d\n", type, front_len); + if (!can_fail) { + pr_err("msg_new can't create type %d front %d\n", type, + front_len); + WARN_ON(1); + } else { + dout("msg_new can't create type %d front %d\n", type, + front_len); + } return NULL; } EXPORT_SYMBOL(ceph_msg_new); @@ -2398,7 +2497,7 @@ static struct ceph_msg *ceph_alloc_msg(struct ceph_connection *con, } if (!msg) { *skip = 0; - msg = ceph_msg_new(type, front_len, GFP_NOFS); + msg = ceph_msg_new(type, front_len, GFP_NOFS, false); if (!msg) { pr_err("unable to allocate msg type %d len %d\n", type, front_len); diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c index cbe31fa45508..0b62deae42bd 100644 --- a/net/ceph/mon_client.c +++ b/net/ceph/mon_client.c @@ -116,14 +116,12 @@ static void __send_prepared_auth_request(struct ceph_mon_client *monc, int len) */ static void __close_session(struct ceph_mon_client *monc) { - if (monc->con) { - dout("__close_session closing mon%d\n", monc->cur_mon); - ceph_con_revoke(monc->con, monc->m_auth); - ceph_con_close(monc->con); - monc->cur_mon = -1; - monc->pending_auth = 0; - ceph_auth_reset(monc->auth); - } + dout("__close_session closing mon%d\n", monc->cur_mon); + ceph_con_revoke(monc->con, monc->m_auth); + ceph_con_close(monc->con); + monc->cur_mon = -1; + monc->pending_auth = 0; + ceph_auth_reset(monc->auth); } /* @@ -302,15 +300,6 @@ void ceph_monc_request_next_osdmap(struct ceph_mon_client *monc) */ int ceph_monc_open_session(struct ceph_mon_client *monc) { - if (!monc->con) { - monc->con = kmalloc(sizeof(*monc->con), GFP_KERNEL); - if (!monc->con) - return -ENOMEM; - ceph_con_init(monc->client->msgr, monc->con); - monc->con->private = monc; - monc->con->ops = &mon_con_ops; - } - mutex_lock(&monc->mutex); __open_session(monc); __schedule_delayed(monc); @@ -528,10 +517,12 @@ int ceph_monc_do_statfs(struct ceph_mon_client *monc, struct ceph_statfs *buf) init_completion(&req->completion); err = -ENOMEM; - req->request = ceph_msg_new(CEPH_MSG_STATFS, sizeof(*h), GFP_NOFS); + req->request = ceph_msg_new(CEPH_MSG_STATFS, sizeof(*h), GFP_NOFS, + true); if (!req->request) goto out; - req->reply = ceph_msg_new(CEPH_MSG_STATFS_REPLY, 1024, GFP_NOFS); + req->reply = ceph_msg_new(CEPH_MSG_STATFS_REPLY, 1024, GFP_NOFS, + true); if (!req->reply) goto out; @@ -626,10 +617,12 @@ int ceph_monc_do_poolop(struct ceph_mon_client *monc, u32 op, init_completion(&req->completion); err = -ENOMEM; - req->request = ceph_msg_new(CEPH_MSG_POOLOP, sizeof(*h), GFP_NOFS); + req->request = ceph_msg_new(CEPH_MSG_POOLOP, sizeof(*h), GFP_NOFS, + true); if (!req->request) goto out; - req->reply = ceph_msg_new(CEPH_MSG_POOLOP_REPLY, 1024, GFP_NOFS); + req->reply = ceph_msg_new(CEPH_MSG_POOLOP_REPLY, 1024, GFP_NOFS, + true); if (!req->reply) goto out; @@ -755,13 +748,21 @@ int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl) if (err) goto out; - monc->con = NULL; + /* connection */ + monc->con = kmalloc(sizeof(*monc->con), GFP_KERNEL); + if (!monc->con) + goto out_monmap; + ceph_con_init(monc->client->msgr, monc->con); + monc->con->private = monc; + monc->con->ops = &mon_con_ops; /* authentication */ monc->auth = ceph_auth_init(cl->options->name, cl->options->key); - if (IS_ERR(monc->auth)) - return PTR_ERR(monc->auth); + if (IS_ERR(monc->auth)) { + err = PTR_ERR(monc->auth); + goto out_con; + } monc->auth->want_keys = CEPH_ENTITY_TYPE_AUTH | CEPH_ENTITY_TYPE_MON | CEPH_ENTITY_TYPE_OSD | CEPH_ENTITY_TYPE_MDS; @@ -770,19 +771,21 @@ int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl) err = -ENOMEM; monc->m_subscribe_ack = ceph_msg_new(CEPH_MSG_MON_SUBSCRIBE_ACK, sizeof(struct ceph_mon_subscribe_ack), - GFP_NOFS); + GFP_NOFS, true); if (!monc->m_subscribe_ack) - goto out_monmap; + goto out_auth; - monc->m_subscribe = ceph_msg_new(CEPH_MSG_MON_SUBSCRIBE, 96, GFP_NOFS); + monc->m_subscribe = ceph_msg_new(CEPH_MSG_MON_SUBSCRIBE, 96, GFP_NOFS, + true); if (!monc->m_subscribe) goto out_subscribe_ack; - monc->m_auth_reply = ceph_msg_new(CEPH_MSG_AUTH_REPLY, 4096, GFP_NOFS); + monc->m_auth_reply = ceph_msg_new(CEPH_MSG_AUTH_REPLY, 4096, GFP_NOFS, + true); if (!monc->m_auth_reply) goto out_subscribe; - monc->m_auth = ceph_msg_new(CEPH_MSG_AUTH, 4096, GFP_NOFS); + monc->m_auth = ceph_msg_new(CEPH_MSG_AUTH, 4096, GFP_NOFS, true); monc->pending_auth = 0; if (!monc->m_auth) goto out_auth_reply; @@ -808,6 +811,10 @@ out_subscribe: ceph_msg_put(monc->m_subscribe); out_subscribe_ack: ceph_msg_put(monc->m_subscribe_ack); +out_auth: + ceph_auth_destroy(monc->auth); +out_con: + monc->con->ops->put(monc->con); out_monmap: kfree(monc->monmap); out: @@ -822,11 +829,11 @@ void ceph_monc_stop(struct ceph_mon_client *monc) mutex_lock(&monc->mutex); __close_session(monc); - if (monc->con) { - monc->con->private = NULL; - monc->con->ops->put(monc->con); - monc->con = NULL; - } + + monc->con->private = NULL; + monc->con->ops->put(monc->con); + monc->con = NULL; + mutex_unlock(&monc->mutex); ceph_auth_destroy(monc->auth); @@ -973,7 +980,7 @@ static struct ceph_msg *mon_alloc_msg(struct ceph_connection *con, case CEPH_MSG_MON_MAP: case CEPH_MSG_MDS_MAP: case CEPH_MSG_OSD_MAP: - m = ceph_msg_new(type, front_len, GFP_NOFS); + m = ceph_msg_new(type, front_len, GFP_NOFS, false); break; } @@ -1000,7 +1007,7 @@ static void mon_fault(struct ceph_connection *con) if (!con->private) goto out; - if (monc->con && !monc->hunting) + if (!monc->hunting) pr_info("mon%d %s session lost, " "hunting for new mon\n", monc->cur_mon, ceph_pr_addr(&monc->con->peer_addr.in_addr)); diff --git a/net/ceph/msgpool.c b/net/ceph/msgpool.c index 1f4cb30a42c5..11d5f4196a73 100644 --- a/net/ceph/msgpool.c +++ b/net/ceph/msgpool.c @@ -12,7 +12,7 @@ static void *msgpool_alloc(gfp_t gfp_mask, void *arg) struct ceph_msgpool *pool = arg; struct ceph_msg *msg; - msg = ceph_msg_new(0, pool->front_len, gfp_mask); + msg = ceph_msg_new(0, pool->front_len, gfp_mask, true); if (!msg) { dout("msgpool_alloc %s failed\n", pool->name); } else { @@ -61,7 +61,7 @@ struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *pool, WARN_ON(1); /* try to alloc a fresh message */ - return ceph_msg_new(0, front_len, GFP_NOFS); + return ceph_msg_new(0, front_len, GFP_NOFS, false); } msg = mempool_alloc(pool->pool, GFP_NOFS); diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 88ad8a2501b5..5e254055c910 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -29,8 +29,8 @@ static void __register_request(struct ceph_osd_client *osdc, struct ceph_osd_request *req); static void __unregister_linger_request(struct ceph_osd_client *osdc, struct ceph_osd_request *req); -static int __send_request(struct ceph_osd_client *osdc, - struct ceph_osd_request *req); +static void __send_request(struct ceph_osd_client *osdc, + struct ceph_osd_request *req); static int op_needs_trail(int op) { @@ -227,7 +227,7 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, msg = ceph_msgpool_get(&osdc->msgpool_op_reply, 0); else msg = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, - OSD_OPREPLY_FRONT_LEN, gfp_flags); + OSD_OPREPLY_FRONT_LEN, gfp_flags, true); if (!msg) { ceph_osdc_put_request(req); return NULL; @@ -244,13 +244,13 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, ceph_pagelist_init(req->r_trail); } /* create request message; allow space for oid */ - msg_size += 40; + msg_size += MAX_OBJ_NAME_SIZE; if (snapc) msg_size += sizeof(u64) * snapc->num_snaps; if (use_mempool) msg = ceph_msgpool_get(&osdc->msgpool_op, 0); else - msg = ceph_msg_new(CEPH_MSG_OSD_OP, msg_size, gfp_flags); + msg = ceph_msg_new(CEPH_MSG_OSD_OP, msg_size, gfp_flags, true); if (!msg) { ceph_osdc_put_request(req); return NULL; @@ -943,7 +943,7 @@ EXPORT_SYMBOL(ceph_osdc_set_request_linger); * Caller should hold map_sem for read and request_mutex. */ static int __map_request(struct ceph_osd_client *osdc, - struct ceph_osd_request *req) + struct ceph_osd_request *req, int force_resend) { struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base; struct ceph_pg pgid; @@ -967,7 +967,8 @@ static int __map_request(struct ceph_osd_client *osdc, num = err; } - if ((req->r_osd && req->r_osd->o_osd == o && + if ((!force_resend && + req->r_osd && req->r_osd->o_osd == o && req->r_sent >= req->r_osd->o_incarnation && req->r_num_pg_osds == num && memcmp(req->r_pg_osds, acting, sizeof(acting[0])*num) == 0) || @@ -1021,8 +1022,8 @@ out: /* * caller should hold map_sem (for read) and request_mutex */ -static int __send_request(struct ceph_osd_client *osdc, - struct ceph_osd_request *req) +static void __send_request(struct ceph_osd_client *osdc, + struct ceph_osd_request *req) { struct ceph_osd_request_head *reqhead; @@ -1040,7 +1041,6 @@ static int __send_request(struct ceph_osd_client *osdc, ceph_msg_get(req->r_request); /* send consumes a ref */ ceph_con_send(&req->r_osd->o_con, req->r_request); req->r_sent = req->r_osd->o_incarnation; - return 0; } /* @@ -1289,18 +1289,18 @@ static void reset_changed_osds(struct ceph_osd_client *osdc) * * Caller should hold map_sem for read and request_mutex. */ -static void kick_requests(struct ceph_osd_client *osdc) +static void kick_requests(struct ceph_osd_client *osdc, int force_resend) { struct ceph_osd_request *req, *nreq; struct rb_node *p; int needmap = 0; int err; - dout("kick_requests\n"); + dout("kick_requests %s\n", force_resend ? " (force resend)" : ""); mutex_lock(&osdc->request_mutex); for (p = rb_first(&osdc->requests); p; p = rb_next(p)) { req = rb_entry(p, struct ceph_osd_request, r_node); - err = __map_request(osdc, req); + err = __map_request(osdc, req, force_resend); if (err < 0) continue; /* error */ if (req->r_osd == NULL) { @@ -1318,7 +1318,7 @@ static void kick_requests(struct ceph_osd_client *osdc) r_linger_item) { dout("linger req=%p req->r_osd=%p\n", req, req->r_osd); - err = __map_request(osdc, req); + err = __map_request(osdc, req, force_resend); if (err == 0) continue; /* no change and no osd was specified */ if (err < 0) @@ -1395,7 +1395,7 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg) ceph_osdmap_destroy(osdc->osdmap); osdc->osdmap = newmap; } - kick_requests(osdc); + kick_requests(osdc, 0); reset_changed_osds(osdc); } else { dout("ignoring incremental map %u len %d\n", @@ -1423,6 +1423,8 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg) "older than our %u\n", epoch, maplen, osdc->osdmap->epoch); } else { + int skipped_map = 0; + dout("taking full map %u len %d\n", epoch, maplen); newmap = osdmap_decode(&p, p+maplen); if (IS_ERR(newmap)) { @@ -1432,9 +1434,12 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg) BUG_ON(!newmap); oldmap = osdc->osdmap; osdc->osdmap = newmap; - if (oldmap) + if (oldmap) { + if (oldmap->epoch + 1 < newmap->epoch) + skipped_map = 1; ceph_osdmap_destroy(oldmap); - kick_requests(osdc); + } + kick_requests(osdc, skipped_map); } p += maplen; nr_maps--; @@ -1707,7 +1712,7 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc, * the request still han't been touched yet. */ if (req->r_sent == 0) { - rc = __map_request(osdc, req); + rc = __map_request(osdc, req, 0); if (rc < 0) { if (nofail) { dout("osdc_start_request failed map, " @@ -1720,17 +1725,9 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc, dout("send_request %p no up osds in pg\n", req); ceph_monc_request_next_osdmap(&osdc->client->monc); } else { - rc = __send_request(osdc, req); - if (rc) { - if (nofail) { - dout("osdc_start_request failed send, " - " will retry %lld\n", req->r_tid); - rc = 0; - } else { - __unregister_request(osdc, req); - } - } + __send_request(osdc, req); } + rc = 0; } out_unlock: @@ -2032,7 +2029,7 @@ static struct ceph_msg *get_reply(struct ceph_connection *con, if (front > req->r_reply->front.iov_len) { pr_warning("get_reply front %d > preallocated %d\n", front, (int)req->r_reply->front.iov_len); - m = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, front, GFP_NOFS); + m = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, front, GFP_NOFS, false); if (!m) goto out; ceph_msg_put(req->r_reply); @@ -2080,7 +2077,7 @@ static struct ceph_msg *alloc_msg(struct ceph_connection *con, switch (type) { case CEPH_MSG_OSD_MAP: case CEPH_MSG_WATCH_NOTIFY: - return ceph_msg_new(type, front, GFP_NOFS); + return ceph_msg_new(type, front, GFP_NOFS, false); case CEPH_MSG_OSD_OPREPLY: return get_reply(con, hdr, skip); default: diff --git a/net/compat.c b/net/compat.c index c578d9382e19..6def90e0a112 100644 --- a/net/compat.c +++ b/net/compat.c @@ -22,6 +22,7 @@ #include <linux/filter.h> #include <linux/compat.h> #include <linux/security.h> +#include <linux/export.h> #include <net/scm.h> #include <net/sock.h> diff --git a/net/core/Makefile b/net/core/Makefile index 0d357b1c4e57..674641b13aea 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -3,12 +3,13 @@ # obj-y := sock.o request_sock.o skbuff.o iovec.o datagram.o stream.o scm.o \ - gen_stats.o gen_estimator.o net_namespace.o secure_seq.o + gen_stats.o gen_estimator.o net_namespace.o secure_seq.o flow_dissector.o obj-$(CONFIG_SYSCTL) += sysctl_net_core.o obj-y += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \ - neighbour.o rtnetlink.o utils.o link_watch.o filter.o + neighbour.o rtnetlink.o utils.o link_watch.o filter.o \ + sock_diag.o obj-$(CONFIG_XFRM) += flow.o obj-y += net-sysfs.o @@ -19,3 +20,4 @@ obj-$(CONFIG_FIB_RULES) += fib_rules.o obj-$(CONFIG_TRACEPOINTS) += net-traces.o obj-$(CONFIG_NET_DROP_MONITOR) += drop_monitor.o obj-$(CONFIG_NETWORK_PHY_TIMESTAMPING) += timestamping.o +obj-$(CONFIG_NETPRIO_CGROUP) += netprio_cgroup.o diff --git a/net/core/datagram.c b/net/core/datagram.c index 18ac112ea7ae..68bbf9f65cb0 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -324,15 +324,15 @@ int skb_copy_datagram_iovec(const struct sk_buff *skb, int offset, /* Copy paged appendix. Hmm... why does this look so complicated? */ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { int end; + const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; WARN_ON(start > offset + len); - end = start + skb_shinfo(skb)->frags[i].size; + end = start + skb_frag_size(frag); if ((copy = end - offset) > 0) { int err; u8 *vaddr; - skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - struct page *page = frag->page; + struct page *page = skb_frag_page(frag); if (copy > len) copy = len; @@ -410,15 +410,15 @@ int skb_copy_datagram_const_iovec(const struct sk_buff *skb, int offset, /* Copy paged appendix. Hmm... why does this look so complicated? */ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { int end; + const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; WARN_ON(start > offset + len); - end = start + skb_shinfo(skb)->frags[i].size; + end = start + skb_frag_size(frag); if ((copy = end - offset) > 0) { int err; u8 *vaddr; - skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - struct page *page = frag->page; + struct page *page = skb_frag_page(frag); if (copy > len) copy = len; @@ -500,15 +500,15 @@ int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset, /* Copy paged appendix. Hmm... why does this look so complicated? */ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { int end; + const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; WARN_ON(start > offset + len); - end = start + skb_shinfo(skb)->frags[i].size; + end = start + skb_frag_size(frag); if ((copy = end - offset) > 0) { int err; u8 *vaddr; - skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - struct page *page = frag->page; + struct page *page = skb_frag_page(frag); if (copy > len) copy = len; @@ -585,16 +585,16 @@ static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset, for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { int end; + const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; WARN_ON(start > offset + len); - end = start + skb_shinfo(skb)->frags[i].size; + end = start + skb_frag_size(frag); if ((copy = end - offset) > 0) { __wsum csum2; int err = 0; u8 *vaddr; - skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - struct page *page = frag->page; + struct page *page = skb_frag_page(frag); if (copy > len) copy = len; diff --git a/net/core/dev.c b/net/core/dev.c index b10ff0a71855..f494675471a9 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -133,6 +133,9 @@ #include <linux/pci.h> #include <linux/inetdevice.h> #include <linux/cpu_rmap.h> +#include <linux/net_tstamp.h> +#include <linux/jump_label.h> +#include <net/flow_keys.h> #include "net-sysfs.h" @@ -1316,8 +1319,6 @@ EXPORT_SYMBOL(dev_close); */ void dev_disable_lro(struct net_device *dev) { - u32 flags; - /* * If we're trying to disable lro on a vlan device * use the underlying physical device instead @@ -1325,15 +1326,9 @@ void dev_disable_lro(struct net_device *dev) if (is_vlan_dev(dev)) dev = vlan_dev_real_dev(dev); - if (dev->ethtool_ops && dev->ethtool_ops->get_flags) - flags = dev->ethtool_ops->get_flags(dev); - else - flags = ethtool_op_get_flags(dev); - - if (!(flags & ETH_FLAG_LRO)) - return; + dev->wanted_features &= ~NETIF_F_LRO; + netdev_update_features(dev); - __ethtool_set_flags(dev, flags & ~ETH_FLAG_LRO); if (unlikely(dev->features & NETIF_F_LRO)) netdev_WARN(dev, "failed to disable LRO!\n"); } @@ -1392,7 +1387,7 @@ rollback: for_each_net(net) { for_each_netdev(net, dev) { if (dev == last) - break; + goto outroll; if (dev->flags & IFF_UP) { nb->notifier_call(nb, NETDEV_GOING_DOWN, dev); @@ -1403,6 +1398,7 @@ rollback: } } +outroll: raw_notifier_chain_unregister(&netdev_chain, nb); goto unlock; } @@ -1445,33 +1441,105 @@ int call_netdevice_notifiers(unsigned long val, struct net_device *dev) } EXPORT_SYMBOL(call_netdevice_notifiers); -/* When > 0 there are consumers of rx skb time stamps */ -static atomic_t netstamp_needed = ATOMIC_INIT(0); +static struct jump_label_key netstamp_needed __read_mostly; +#ifdef HAVE_JUMP_LABEL +/* We are not allowed to call jump_label_dec() from irq context + * If net_disable_timestamp() is called from irq context, defer the + * jump_label_dec() calls. + */ +static atomic_t netstamp_needed_deferred; +#endif void net_enable_timestamp(void) { - atomic_inc(&netstamp_needed); +#ifdef HAVE_JUMP_LABEL + int deferred = atomic_xchg(&netstamp_needed_deferred, 0); + + if (deferred) { + while (--deferred) + jump_label_dec(&netstamp_needed); + return; + } +#endif + WARN_ON(in_interrupt()); + jump_label_inc(&netstamp_needed); } EXPORT_SYMBOL(net_enable_timestamp); void net_disable_timestamp(void) { - atomic_dec(&netstamp_needed); +#ifdef HAVE_JUMP_LABEL + if (in_interrupt()) { + atomic_inc(&netstamp_needed_deferred); + return; + } +#endif + jump_label_dec(&netstamp_needed); } EXPORT_SYMBOL(net_disable_timestamp); static inline void net_timestamp_set(struct sk_buff *skb) { - if (atomic_read(&netstamp_needed)) + skb->tstamp.tv64 = 0; + if (static_branch(&netstamp_needed)) __net_timestamp(skb); - else - skb->tstamp.tv64 = 0; } -static inline void net_timestamp_check(struct sk_buff *skb) +#define net_timestamp_check(COND, SKB) \ + if (static_branch(&netstamp_needed)) { \ + if ((COND) && !(SKB)->tstamp.tv64) \ + __net_timestamp(SKB); \ + } \ + +static int net_hwtstamp_validate(struct ifreq *ifr) { - if (!skb->tstamp.tv64 && atomic_read(&netstamp_needed)) - __net_timestamp(skb); + struct hwtstamp_config cfg; + enum hwtstamp_tx_types tx_type; + enum hwtstamp_rx_filters rx_filter; + int tx_type_valid = 0; + int rx_filter_valid = 0; + + if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg))) + return -EFAULT; + + if (cfg.flags) /* reserved for future extensions */ + return -EINVAL; + + tx_type = cfg.tx_type; + rx_filter = cfg.rx_filter; + + switch (tx_type) { + case HWTSTAMP_TX_OFF: + case HWTSTAMP_TX_ON: + case HWTSTAMP_TX_ONESTEP_SYNC: + tx_type_valid = 1; + break; + } + + switch (rx_filter) { + case HWTSTAMP_FILTER_NONE: + case HWTSTAMP_FILTER_ALL: + case HWTSTAMP_FILTER_SOME: + case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: + case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: + case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_EVENT: + case HWTSTAMP_FILTER_PTP_V2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: + rx_filter_valid = 1; + break; + } + + if (!tx_type_valid || !rx_filter_valid) + return -ERANGE; + + return 0; } static inline bool is_skb_forwardable(struct net_device *dev, @@ -1868,7 +1936,8 @@ EXPORT_SYMBOL(skb_checksum_help); * It may return NULL if the skb requires no segmentation. This is * only possible when GSO is used for verifying header integrity. */ -struct sk_buff *skb_gso_segment(struct sk_buff *skb, u32 features) +struct sk_buff *skb_gso_segment(struct sk_buff *skb, + netdev_features_t features) { struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); struct packet_type *ptype; @@ -1898,9 +1967,9 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, u32 features) if (dev && dev->ethtool_ops && dev->ethtool_ops->get_drvinfo) dev->ethtool_ops->get_drvinfo(dev, &info); - WARN(1, "%s: caps=(0x%lx, 0x%lx) len=%d data_len=%d ip_summed=%d\n", - info.driver, dev ? dev->features : 0L, - skb->sk ? skb->sk->sk_route_caps : 0L, + WARN(1, "%s: caps=(%pNF, %pNF) len=%d data_len=%d ip_summed=%d\n", + info.driver, dev ? &dev->features : NULL, + skb->sk ? &skb->sk->sk_route_caps : NULL, skb->len, skb->data_len, skb->ip_summed); if (skb_header_cloned(skb) && @@ -1955,9 +2024,11 @@ static int illegal_highdma(struct net_device *dev, struct sk_buff *skb) #ifdef CONFIG_HIGHMEM int i; if (!(dev->features & NETIF_F_HIGHDMA)) { - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) - if (PageHighMem(skb_shinfo(skb)->frags[i].page)) + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + if (PageHighMem(skb_frag_page(frag))) return 1; + } } if (PCI_DMA_BUS_IS_PHYS) { @@ -1966,7 +2037,8 @@ static int illegal_highdma(struct net_device *dev, struct sk_buff *skb) if (!pdev) return 0; for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { - dma_addr_t addr = page_to_phys(skb_shinfo(skb)->frags[i].page); + skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + dma_addr_t addr = page_to_phys(skb_frag_page(frag)); if (!pdev->dma_mask || addr + PAGE_SIZE - 1 > *pdev->dma_mask) return 1; } @@ -2006,7 +2078,7 @@ static void dev_gso_skb_destructor(struct sk_buff *skb) * This function segments the given skb and stores the list of segments * in skb->next. */ -static int dev_gso_segment(struct sk_buff *skb, int features) +static int dev_gso_segment(struct sk_buff *skb, netdev_features_t features) { struct sk_buff *segs; @@ -2045,7 +2117,7 @@ static inline void skb_orphan_try(struct sk_buff *skb) } } -static bool can_checksum_protocol(unsigned long features, __be16 protocol) +static bool can_checksum_protocol(netdev_features_t features, __be16 protocol) { return ((features & NETIF_F_GEN_CSUM) || ((features & NETIF_F_V4_CSUM) && @@ -2056,7 +2128,8 @@ static bool can_checksum_protocol(unsigned long features, __be16 protocol) protocol == htons(ETH_P_FCOE))); } -static u32 harmonize_features(struct sk_buff *skb, __be16 protocol, u32 features) +static netdev_features_t harmonize_features(struct sk_buff *skb, + __be16 protocol, netdev_features_t features) { if (!can_checksum_protocol(features, protocol)) { features &= ~NETIF_F_ALL_CSUM; @@ -2068,10 +2141,10 @@ static u32 harmonize_features(struct sk_buff *skb, __be16 protocol, u32 features return features; } -u32 netif_skb_features(struct sk_buff *skb) +netdev_features_t netif_skb_features(struct sk_buff *skb) { __be16 protocol = skb->protocol; - u32 features = skb->dev->features; + netdev_features_t features = skb->dev->features; if (protocol == htons(ETH_P_8021Q)) { struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; @@ -2117,7 +2190,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, unsigned int skb_len; if (likely(!skb->next)) { - u32 features; + netdev_features_t features; /* * If device doesn't need skb->dst, release it right now while @@ -2198,7 +2271,7 @@ gso: return rc; } txq_trans_update(txq); - if (unlikely(netif_tx_queue_stopped(txq) && skb->next)) + if (unlikely(netif_xmit_stopped(txq) && skb->next)) return NETDEV_TX_BUSY; } while (skb->next); @@ -2398,6 +2471,18 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, return rc; } +#if IS_ENABLED(CONFIG_NETPRIO_CGROUP) +static void skb_update_prio(struct sk_buff *skb) +{ + struct netprio_map *map = rcu_dereference_bh(skb->dev->priomap); + + if ((!skb->priority) && (skb->sk) && map) + skb->priority = map->priomap[skb->sk->sk_cgrp_prioidx]; +} +#else +#define skb_update_prio(skb) +#endif + static DEFINE_PER_CPU(int, xmit_recursion); #define RECURSION_LIMIT 10 @@ -2438,6 +2523,8 @@ int dev_queue_xmit(struct sk_buff *skb) */ rcu_read_lock_bh(); + skb_update_prio(skb); + txq = dev_pick_tx(dev, skb); q = rcu_dereference_bh(txq->qdisc); @@ -2472,7 +2559,7 @@ int dev_queue_xmit(struct sk_buff *skb) HARD_TX_LOCK(dev, txq, cpu); - if (!netif_tx_queue_stopped(txq)) { + if (!netif_xmit_stopped(txq)) { __this_cpu_inc(xmit_recursion); rc = dev_hard_start_xmit(skb, dev, txq); __this_cpu_dec(xmit_recursion); @@ -2527,72 +2614,35 @@ static inline void ____napi_schedule(struct softnet_data *sd, /* * __skb_get_rxhash: calculate a flow hash based on src/dst addresses - * and src/dst port numbers. Returns a non-zero hash number on success - * and 0 on failure. + * and src/dst port numbers. Sets rxhash in skb to non-zero hash value + * on success, zero indicates no valid hash. Also, sets l4_rxhash in skb + * if hash is a canonical 4-tuple hash over transport ports. */ -__u32 __skb_get_rxhash(struct sk_buff *skb) -{ - int nhoff, hash = 0, poff; - const struct ipv6hdr *ip6; - const struct iphdr *ip; - u8 ip_proto; - u32 addr1, addr2, ihl; - union { - u32 v32; - u16 v16[2]; - } ports; - - nhoff = skb_network_offset(skb); - - switch (skb->protocol) { - case __constant_htons(ETH_P_IP): - if (!pskb_may_pull(skb, sizeof(*ip) + nhoff)) - goto done; +void __skb_get_rxhash(struct sk_buff *skb) +{ + struct flow_keys keys; + u32 hash; - ip = (const struct iphdr *) (skb->data + nhoff); - if (ip_is_fragment(ip)) - ip_proto = 0; - else - ip_proto = ip->protocol; - addr1 = (__force u32) ip->saddr; - addr2 = (__force u32) ip->daddr; - ihl = ip->ihl; - break; - case __constant_htons(ETH_P_IPV6): - if (!pskb_may_pull(skb, sizeof(*ip6) + nhoff)) - goto done; + if (!skb_flow_dissect(skb, &keys)) + return; - ip6 = (const struct ipv6hdr *) (skb->data + nhoff); - ip_proto = ip6->nexthdr; - addr1 = (__force u32) ip6->saddr.s6_addr32[3]; - addr2 = (__force u32) ip6->daddr.s6_addr32[3]; - ihl = (40 >> 2); - break; - default: - goto done; - } - - ports.v32 = 0; - poff = proto_ports_offset(ip_proto); - if (poff >= 0) { - nhoff += ihl * 4 + poff; - if (pskb_may_pull(skb, nhoff + 4)) { - ports.v32 = * (__force u32 *) (skb->data + nhoff); - if (ports.v16[1] < ports.v16[0]) - swap(ports.v16[0], ports.v16[1]); - } + if (keys.ports) { + if ((__force u16)keys.port16[1] < (__force u16)keys.port16[0]) + swap(keys.port16[0], keys.port16[1]); + skb->l4_rxhash = 1; } /* get a consistent hash (same value on both flow directions) */ - if (addr2 < addr1) - swap(addr1, addr2); + if ((__force u32)keys.dst < (__force u32)keys.src) + swap(keys.dst, keys.src); - hash = jhash_3words(addr1, addr2, ports.v32, hashrnd); + hash = jhash_3words((__force u32)keys.dst, + (__force u32)keys.src, + (__force u32)keys.ports, hashrnd); if (!hash) hash = 1; -done: - return hash; + skb->rxhash = hash; } EXPORT_SYMBOL(__skb_get_rxhash); @@ -2602,14 +2652,13 @@ EXPORT_SYMBOL(__skb_get_rxhash); struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly; EXPORT_SYMBOL(rps_sock_flow_table); +struct jump_label_key rps_needed __read_mostly; + static struct rps_dev_flow * set_rps_cpu(struct net_device *dev, struct sk_buff *skb, struct rps_dev_flow *rflow, u16 next_cpu) { - u16 tcpu; - - tcpu = rflow->cpu = next_cpu; - if (tcpu != RPS_NO_CPU) { + if (next_cpu != RPS_NO_CPU) { #ifdef CONFIG_RFS_ACCEL struct netdev_rx_queue *rxqueue; struct rps_dev_flow_table *flow_table; @@ -2637,16 +2686,16 @@ set_rps_cpu(struct net_device *dev, struct sk_buff *skb, goto out; old_rflow = rflow; rflow = &flow_table->flows[flow_id]; - rflow->cpu = next_cpu; rflow->filter = rc; if (old_rflow->filter == rflow->filter) old_rflow->filter = RPS_NO_FILTER; out: #endif rflow->last_qtail = - per_cpu(softnet_data, tcpu).input_queue_head; + per_cpu(softnet_data, next_cpu).input_queue_head; } + rflow->cpu = next_cpu; return rflow; } @@ -2681,13 +2730,13 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, map = rcu_dereference(rxqueue->rps_map); if (map) { if (map->len == 1 && - !rcu_dereference_raw(rxqueue->rps_flow_table)) { + !rcu_access_pointer(rxqueue->rps_flow_table)) { tcpu = map->cpus[0]; if (cpu_online(tcpu)) cpu = tcpu; goto done; } - } else if (!rcu_dereference_raw(rxqueue->rps_flow_table)) { + } else if (!rcu_access_pointer(rxqueue->rps_flow_table)) { goto done; } @@ -2884,12 +2933,11 @@ int netif_rx(struct sk_buff *skb) if (netpoll_rx(skb)) return NET_RX_DROP; - if (netdev_tstamp_prequeue) - net_timestamp_check(skb); + net_timestamp_check(netdev_tstamp_prequeue, skb); trace_netif_rx(skb); #ifdef CONFIG_RPS - { + if (static_branch(&rps_needed)) { struct rps_dev_flow voidflow, *rflow = &voidflow; int cpu; @@ -2904,14 +2952,13 @@ int netif_rx(struct sk_buff *skb) rcu_read_unlock(); preempt_enable(); - } -#else + } else +#endif { unsigned int qtail; ret = enqueue_to_backlog(skb, get_cpu(), &qtail); put_cpu(); } -#endif return ret; } EXPORT_SYMBOL(netif_rx); @@ -3102,8 +3149,8 @@ void netdev_rx_handler_unregister(struct net_device *dev) { ASSERT_RTNL(); - rcu_assign_pointer(dev->rx_handler, NULL); - rcu_assign_pointer(dev->rx_handler_data, NULL); + RCU_INIT_POINTER(dev->rx_handler, NULL); + RCU_INIT_POINTER(dev->rx_handler_data, NULL); } EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister); @@ -3117,8 +3164,7 @@ static int __netif_receive_skb(struct sk_buff *skb) int ret = NET_RX_DROP; __be16 type; - if (!netdev_tstamp_prequeue) - net_timestamp_check(skb); + net_timestamp_check(!netdev_tstamp_prequeue, skb); trace_netif_receive_skb(skb); @@ -3171,6 +3217,17 @@ ncls: #endif rx_handler = rcu_dereference(skb->dev->rx_handler); + if (vlan_tx_tag_present(skb)) { + if (pt_prev) { + ret = deliver_skb(skb, pt_prev, orig_dev); + pt_prev = NULL; + } + if (vlan_do_receive(&skb, !rx_handler)) + goto another_round; + else if (unlikely(!skb)) + goto out; + } + if (rx_handler) { if (pt_prev) { ret = deliver_skb(skb, pt_prev, orig_dev); @@ -3190,18 +3247,6 @@ ncls: } } - if (vlan_tx_tag_present(skb)) { - if (pt_prev) { - ret = deliver_skb(skb, pt_prev, orig_dev); - pt_prev = NULL; - } - if (vlan_do_receive(&skb)) { - ret = __netif_receive_skb(skb); - goto out; - } else if (unlikely(!skb)) - goto out; - } - /* deliver only exact match when indicated */ null_or_dev = deliver_exact ? skb->dev : NULL; @@ -3250,14 +3295,13 @@ out: */ int netif_receive_skb(struct sk_buff *skb) { - if (netdev_tstamp_prequeue) - net_timestamp_check(skb); + net_timestamp_check(netdev_tstamp_prequeue, skb); if (skb_defer_rx_timestamp(skb)) return NET_RX_SUCCESS; #ifdef CONFIG_RPS - { + if (static_branch(&rps_needed)) { struct rps_dev_flow voidflow, *rflow = &voidflow; int cpu, ret; @@ -3268,16 +3312,12 @@ int netif_receive_skb(struct sk_buff *skb) if (cpu >= 0) { ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail); rcu_read_unlock(); - } else { - rcu_read_unlock(); - ret = __netif_receive_skb(skb); + return ret; } - - return ret; + rcu_read_unlock(); } -#else - return __netif_receive_skb(skb); #endif + return __netif_receive_skb(skb); } EXPORT_SYMBOL(netif_receive_skb); @@ -3429,10 +3469,10 @@ pull: skb->data_len -= grow; skb_shinfo(skb)->frags[0].page_offset += grow; - skb_shinfo(skb)->frags[0].size -= grow; + skb_frag_size_sub(&skb_shinfo(skb)->frags[0], grow); - if (unlikely(!skb_shinfo(skb)->frags[0].size)) { - put_page(skb_shinfo(skb)->frags[0].page); + if (unlikely(!skb_frag_size(&skb_shinfo(skb)->frags[0]))) { + skb_frag_unref(skb, 0); memmove(skb_shinfo(skb)->frags, skb_shinfo(skb)->frags + 1, --skb_shinfo(skb)->nr_frags * sizeof(skb_frag_t)); @@ -3496,11 +3536,10 @@ void skb_gro_reset_offset(struct sk_buff *skb) NAPI_GRO_CB(skb)->frag0_len = 0; if (skb->mac_header == skb->tail && - !PageHighMem(skb_shinfo(skb)->frags[0].page)) { + !PageHighMem(skb_frag_page(&skb_shinfo(skb)->frags[0]))) { NAPI_GRO_CB(skb)->frag0 = - page_address(skb_shinfo(skb)->frags[0].page) + - skb_shinfo(skb)->frags[0].page_offset; - NAPI_GRO_CB(skb)->frag0_len = skb_shinfo(skb)->frags[0].size; + skb_frag_address(&skb_shinfo(skb)->frags[0]); + NAPI_GRO_CB(skb)->frag0_len = skb_frag_size(&skb_shinfo(skb)->frags[0]); } } EXPORT_SYMBOL(skb_gro_reset_offset); @@ -3982,6 +4021,60 @@ static int dev_ifconf(struct net *net, char __user *arg) } #ifdef CONFIG_PROC_FS + +#define BUCKET_SPACE (32 - NETDEV_HASHBITS) + +struct dev_iter_state { + struct seq_net_private p; + unsigned int pos; /* bucket << BUCKET_SPACE + offset */ +}; + +#define get_bucket(x) ((x) >> BUCKET_SPACE) +#define get_offset(x) ((x) & ((1 << BUCKET_SPACE) - 1)) +#define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o)) + +static inline struct net_device *dev_from_same_bucket(struct seq_file *seq) +{ + struct dev_iter_state *state = seq->private; + struct net *net = seq_file_net(seq); + struct net_device *dev; + struct hlist_node *p; + struct hlist_head *h; + unsigned int count, bucket, offset; + + bucket = get_bucket(state->pos); + offset = get_offset(state->pos); + h = &net->dev_name_head[bucket]; + count = 0; + hlist_for_each_entry_rcu(dev, p, h, name_hlist) { + if (count++ == offset) { + state->pos = set_bucket_offset(bucket, count); + return dev; + } + } + + return NULL; +} + +static inline struct net_device *dev_from_new_bucket(struct seq_file *seq) +{ + struct dev_iter_state *state = seq->private; + struct net_device *dev; + unsigned int bucket; + + bucket = get_bucket(state->pos); + do { + dev = dev_from_same_bucket(seq); + if (dev) + return dev; + + bucket++; + state->pos = set_bucket_offset(bucket, 0); + } while (bucket < NETDEV_HASHENTRIES); + + return NULL; +} + /* * This is invoked by the /proc filesystem handler to display a device * in detail. @@ -3989,33 +4082,33 @@ static int dev_ifconf(struct net *net, char __user *arg) void *dev_seq_start(struct seq_file *seq, loff_t *pos) __acquires(RCU) { - struct net *net = seq_file_net(seq); - loff_t off; - struct net_device *dev; + struct dev_iter_state *state = seq->private; rcu_read_lock(); if (!*pos) return SEQ_START_TOKEN; - off = 1; - for_each_netdev_rcu(net, dev) - if (off++ == *pos) - return dev; + /* check for end of the hash */ + if (state->pos == 0 && *pos > 1) + return NULL; - return NULL; + return dev_from_new_bucket(seq); } void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) { - struct net_device *dev = v; + struct net_device *dev; + + ++*pos; if (v == SEQ_START_TOKEN) - dev = first_net_device_rcu(seq_file_net(seq)); - else - dev = next_net_device_rcu(dev); + return dev_from_new_bucket(seq); - ++*pos; - return dev; + dev = dev_from_same_bucket(seq); + if (dev) + return dev; + + return dev_from_new_bucket(seq); } void dev_seq_stop(struct seq_file *seq, void *v) @@ -4114,7 +4207,13 @@ static const struct seq_operations dev_seq_ops = { static int dev_seq_open(struct inode *inode, struct file *file) { return seq_open_net(inode, file, &dev_seq_ops, - sizeof(struct seq_net_private)); + sizeof(struct dev_iter_state)); +} + +int dev_seq_open_ops(struct inode *inode, struct file *file, + const struct seq_operations *ops) +{ + return seq_open_net(inode, file, ops, sizeof(struct dev_iter_state)); } static const struct file_operations dev_seq_fops = { @@ -4367,7 +4466,7 @@ static void dev_change_rx_flags(struct net_device *dev, int flags) static int __dev_set_promiscuity(struct net_device *dev, int inc) { - unsigned short old_flags = dev->flags; + unsigned int old_flags = dev->flags; uid_t uid; gid_t gid; @@ -4424,7 +4523,7 @@ static int __dev_set_promiscuity(struct net_device *dev, int inc) */ int dev_set_promiscuity(struct net_device *dev, int inc) { - unsigned short old_flags = dev->flags; + unsigned int old_flags = dev->flags; int err; err = __dev_set_promiscuity(dev, inc); @@ -4451,7 +4550,7 @@ EXPORT_SYMBOL(dev_set_promiscuity); int dev_set_allmulti(struct net_device *dev, int inc) { - unsigned short old_flags = dev->flags; + unsigned int old_flags = dev->flags; ASSERT_RTNL(); @@ -4497,9 +4596,7 @@ void __dev_set_rx_mode(struct net_device *dev) if (!netif_device_present(dev)) return; - if (ops->ndo_set_rx_mode) - ops->ndo_set_rx_mode(dev); - else { + if (!(dev->priv_flags & IFF_UNICAST_FLT)) { /* Unicast addresses changes may only happen under the rtnl, * therefore calling __dev_set_promiscuity here is safe. */ @@ -4510,10 +4607,10 @@ void __dev_set_rx_mode(struct net_device *dev) __dev_set_promiscuity(dev, -1); dev->uc_promisc = false; } - - if (ops->ndo_set_multicast_list) - ops->ndo_set_multicast_list(dev); } + + if (ops->ndo_set_rx_mode) + ops->ndo_set_rx_mode(dev); } void dev_set_rx_mode(struct net_device *dev) @@ -4524,30 +4621,6 @@ void dev_set_rx_mode(struct net_device *dev) } /** - * dev_ethtool_get_settings - call device's ethtool_ops::get_settings() - * @dev: device - * @cmd: memory area for ethtool_ops::get_settings() result - * - * The cmd arg is initialized properly (cleared and - * ethtool_cmd::cmd field set to ETHTOOL_GSET). - * - * Return device's ethtool_ops::get_settings() result value or - * -EOPNOTSUPP when device doesn't expose - * ethtool_ops::get_settings() operation. - */ -int dev_ethtool_get_settings(struct net_device *dev, - struct ethtool_cmd *cmd) -{ - if (!dev->ethtool_ops || !dev->ethtool_ops->get_settings) - return -EOPNOTSUPP; - - memset(cmd, 0, sizeof(struct ethtool_cmd)); - cmd->cmd = ETHTOOL_GSET; - return dev->ethtool_ops->get_settings(dev, cmd); -} -EXPORT_SYMBOL(dev_ethtool_get_settings); - -/** * dev_get_flags - get flags reported to userspace * @dev: device * @@ -4580,7 +4653,7 @@ EXPORT_SYMBOL(dev_get_flags); int __dev_change_flags(struct net_device *dev, unsigned int flags) { - int old_flags = dev->flags; + unsigned int old_flags = dev->flags; int ret; ASSERT_RTNL(); @@ -4663,10 +4736,10 @@ void __dev_notify_flags(struct net_device *dev, unsigned int old_flags) * Change settings on device based state flags. The flags are * in the userspace exported format. */ -int dev_change_flags(struct net_device *dev, unsigned flags) +int dev_change_flags(struct net_device *dev, unsigned int flags) { - int ret, changes; - int old_flags = dev->flags; + int ret; + unsigned int changes, old_flags = dev->flags; ret = __dev_change_flags(dev, flags); if (ret < 0) @@ -4863,7 +4936,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd) return -EOPNOTSUPP; case SIOCADDMULTI: - if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) || + if (!ops->ndo_set_rx_mode || ifr->ifr_hwaddr.sa_family != AF_UNSPEC) return -EINVAL; if (!netif_device_present(dev)) @@ -4871,7 +4944,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd) return dev_mc_add_global(dev, ifr->ifr_hwaddr.sa_data); case SIOCDELMULTI: - if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) || + if (!ops->ndo_set_rx_mode || ifr->ifr_hwaddr.sa_family != AF_UNSPEC) return -EINVAL; if (!netif_device_present(dev)) @@ -4888,6 +4961,12 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd) ifr->ifr_newname[IFNAMSIZ-1] = '\0'; return dev_change_name(dev, ifr->ifr_newname); + case SIOCSHWTSTAMP: + err = net_hwtstamp_validate(ifr); + if (err) + return err; + /* fall through */ + /* * Unknown or private ioctl */ @@ -5202,7 +5281,7 @@ static void rollback_registered_many(struct list_head *head) dev = list_first_entry(head, struct net_device, unreg_list); call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev); - rcu_barrier(); + synchronize_net(); list_for_each_entry(dev, head, unreg_list) dev_put(dev); @@ -5217,7 +5296,8 @@ static void rollback_registered(struct net_device *dev) list_del(&single); } -static u32 netdev_fix_features(struct net_device *dev, u32 features) +static netdev_features_t netdev_fix_features(struct net_device *dev, + netdev_features_t features) { /* Fix illegal checksum combinations */ if ((features & NETIF_F_HW_CSUM) && @@ -5226,12 +5306,6 @@ static u32 netdev_fix_features(struct net_device *dev, u32 features) features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM); } - if ((features & NETIF_F_NO_CSUM) && - (features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { - netdev_warn(dev, "mixed no checksumming and other settings.\n"); - features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM); - } - /* Fix illegal SG+CSUM combinations. */ if ((features & NETIF_F_SG) && !(features & NETIF_F_ALL_CSUM)) { @@ -5279,7 +5353,7 @@ static u32 netdev_fix_features(struct net_device *dev, u32 features) int __netdev_update_features(struct net_device *dev) { - u32 features; + netdev_features_t features; int err = 0; ASSERT_RTNL(); @@ -5295,16 +5369,16 @@ int __netdev_update_features(struct net_device *dev) if (dev->features == features) return 0; - netdev_dbg(dev, "Features changed: 0x%08x -> 0x%08x\n", - dev->features, features); + netdev_dbg(dev, "Features changed: %pNF -> %pNF\n", + &dev->features, &features); if (dev->netdev_ops->ndo_set_features) err = dev->netdev_ops->ndo_set_features(dev, features); if (unlikely(err < 0)) { netdev_err(dev, - "set_features() failed (%d); wanted 0x%08x, left 0x%08x\n", - err, features, dev->features); + "set_features() failed (%d); wanted %pNF, left %pNF\n", + err, &features, &dev->features); return -1; } @@ -5403,6 +5477,9 @@ static void netdev_init_one_queue(struct net_device *dev, queue->xmit_lock_owner = -1; netdev_queue_numa_node_write(queue, NUMA_NO_NODE); queue->dev = dev; +#ifdef CONFIG_BQL + dql_init(&queue->dql, HZ); +#endif } static int netif_alloc_netdev_queues(struct net_device *dev) @@ -5488,11 +5565,12 @@ int register_netdevice(struct net_device *dev) dev->wanted_features = dev->features & dev->hw_features; /* Turn on no cache copy if HW is doing checksum */ - dev->hw_features |= NETIF_F_NOCACHE_COPY; - if ((dev->features & NETIF_F_ALL_CSUM) && - !(dev->features & NETIF_F_NO_CSUM)) { - dev->wanted_features |= NETIF_F_NOCACHE_COPY; - dev->features |= NETIF_F_NOCACHE_COPY; + if (!(dev->flags & IFF_LOOPBACK)) { + dev->hw_features |= NETIF_F_NOCACHE_COPY; + if (dev->features & NETIF_F_ALL_CSUM) { + dev->wanted_features |= NETIF_F_NOCACHE_COPY; + dev->features |= NETIF_F_NOCACHE_COPY; + } } /* Make NETIF_F_HIGHDMA inheritable to VLAN devices. @@ -5715,6 +5793,12 @@ void netdev_run_todo(void) __rtnl_unlock(); + /* Wait for rcu callbacks to finish before attempting to drain + * the device list. This usually avoids a 250ms wait. + */ + if (!list_empty(&list)) + rcu_barrier(); + while (!list_empty(&list)) { struct net_device *dev = list_first_entry(&list, struct net_device, todo_list); @@ -5735,8 +5819,8 @@ void netdev_run_todo(void) /* paranoia */ BUG_ON(netdev_refcnt_read(dev)); - WARN_ON(rcu_dereference_raw(dev->ip_ptr)); - WARN_ON(rcu_dereference_raw(dev->ip6_ptr)); + WARN_ON(rcu_access_pointer(dev->ip_ptr)); + WARN_ON(rcu_access_pointer(dev->ip6_ptr)); WARN_ON(dev->dn_ptr); if (dev->destructor) @@ -5940,7 +6024,7 @@ void free_netdev(struct net_device *dev) kfree(dev->_rx); #endif - kfree(rcu_dereference_raw(dev->ingress_queue)); + kfree(rcu_dereference_protected(dev->ingress_queue, 1)); /* Flush device addresses */ dev_addr_flush(dev); @@ -6115,6 +6199,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char */ call_netdevice_notifiers(NETDEV_UNREGISTER, dev); call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev); + rtmsg_ifinfo(RTM_DELLINK, dev, ~0U); /* * Flush the unicast and multicast chains @@ -6221,7 +6306,8 @@ static int dev_cpu_callback(struct notifier_block *nfb, * @one to the master device with current feature set @all. Will not * enable anything that is off in @mask. Returns the new feature set. */ -u32 netdev_increment_features(u32 all, u32 one, u32 mask) +netdev_features_t netdev_increment_features(netdev_features_t all, + netdev_features_t one, netdev_features_t mask) { if (mask & NETIF_F_GEN_CSUM) mask |= NETIF_F_ALL_CSUM; @@ -6230,10 +6316,6 @@ u32 netdev_increment_features(u32 all, u32 one, u32 mask) all |= one & (NETIF_F_ONE_FOR_ALL|NETIF_F_ALL_CSUM) & mask; all &= one | ~NETIF_F_ALL_FOR_ALL; - /* If device needs checksumming, downgrade to it. */ - if (all & (NETIF_F_ALL_CSUM & ~NETIF_F_NO_CSUM)) - all &= ~NETIF_F_NO_CSUM; - /* If one device supports hw checksumming, set for all. */ if (all & NETIF_F_GEN_CSUM) all &= ~(NETIF_F_ALL_CSUM & ~NETIF_F_GEN_CSUM); @@ -6298,7 +6380,7 @@ const char *netdev_drivername(const struct net_device *dev) return empty; } -static int __netdev_printk(const char *level, const struct net_device *dev, +int __netdev_printk(const char *level, const struct net_device *dev, struct va_format *vaf) { int r; @@ -6313,6 +6395,7 @@ static int __netdev_printk(const char *level, const struct net_device *dev, return r; } +EXPORT_SYMBOL(__netdev_printk); int netdev_printk(const char *level, const struct net_device *dev, const char *format, ...) diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c index e2e66939ed00..29c07fef9228 100644 --- a/net/core/dev_addr_lists.c +++ b/net/core/dev_addr_lists.c @@ -13,6 +13,7 @@ #include <linux/netdevice.h> #include <linux/rtnetlink.h> +#include <linux/export.h> #include <linux/list.h> #include <linux/proc_fs.h> @@ -426,7 +427,7 @@ EXPORT_SYMBOL(dev_uc_del); * * Add newly added addresses to the destination device and release * addresses that have no users left. The source device must be - * locked by netif_tx_lock_bh. + * locked by netif_addr_lock_bh. * * This function is intended to be called from the dev->set_rx_mode * function of layered software devices. @@ -438,11 +439,11 @@ int dev_uc_sync(struct net_device *to, struct net_device *from) if (to->addr_len != from->addr_len) return -EINVAL; - netif_addr_lock_bh(to); + netif_addr_lock_nested(to); err = __hw_addr_sync(&to->uc, &from->uc, to->addr_len); if (!err) __dev_set_rx_mode(to); - netif_addr_unlock_bh(to); + netif_addr_unlock(to); return err; } EXPORT_SYMBOL(dev_uc_sync); @@ -462,7 +463,7 @@ void dev_uc_unsync(struct net_device *to, struct net_device *from) return; netif_addr_lock_bh(from); - netif_addr_lock(to); + netif_addr_lock_nested(to); __hw_addr_unsync(&to->uc, &from->uc, to->addr_len); __dev_set_rx_mode(to); netif_addr_unlock(to); @@ -589,10 +590,10 @@ EXPORT_SYMBOL(dev_mc_del_global); * * Add newly added addresses to the destination device and release * addresses that have no users left. The source device must be - * locked by netif_tx_lock_bh. + * locked by netif_addr_lock_bh. * - * This function is intended to be called from the dev->set_multicast_list - * or dev->set_rx_mode function of layered software devices. + * This function is intended to be called from the ndo_set_rx_mode + * function of layered software devices. */ int dev_mc_sync(struct net_device *to, struct net_device *from) { @@ -601,11 +602,11 @@ int dev_mc_sync(struct net_device *to, struct net_device *from) if (to->addr_len != from->addr_len) return -EINVAL; - netif_addr_lock_bh(to); + netif_addr_lock_nested(to); err = __hw_addr_sync(&to->mc, &from->mc, to->addr_len); if (!err) __dev_set_rx_mode(to); - netif_addr_unlock_bh(to); + netif_addr_unlock(to); return err; } EXPORT_SYMBOL(dev_mc_sync); @@ -625,7 +626,7 @@ void dev_mc_unsync(struct net_device *to, struct net_device *from) return; netif_addr_lock_bh(from); - netif_addr_lock(to); + netif_addr_lock_nested(to); __hw_addr_unsync(&to->mc, &from->mc, to->addr_len); __dev_set_rx_mode(to); netif_addr_unlock(to); @@ -695,8 +696,7 @@ static const struct seq_operations dev_mc_seq_ops = { static int dev_mc_seq_open(struct inode *inode, struct file *file) { - return seq_open_net(inode, file, &dev_mc_seq_ops, - sizeof(struct seq_net_private)); + return dev_seq_open_ops(inode, file, &dev_mc_seq_ops); } static const struct file_operations dev_mc_seq_fops = { diff --git a/net/core/dst.c b/net/core/dst.c index 14b33baf0733..43d94cedbf7c 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -171,7 +171,7 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev, dst_init_metrics(dst, dst_default_metrics, true); dst->expires = 0UL; dst->path = dst; - dst->_neighbour = NULL; + RCU_INIT_POINTER(dst->_neighbour, NULL); #ifdef CONFIG_XFRM dst->xfrm = NULL; #endif @@ -229,11 +229,11 @@ struct dst_entry *dst_destroy(struct dst_entry * dst) smp_rmb(); again: - neigh = dst->_neighbour; + neigh = rcu_dereference_protected(dst->_neighbour, 1); child = dst->child; if (neigh) { - dst->_neighbour = NULL; + RCU_INIT_POINTER(dst->_neighbour, NULL); neigh_release(neigh); } @@ -360,14 +360,19 @@ static void dst_ifdown(struct dst_entry *dst, struct net_device *dev, if (!unregister) { dst->input = dst->output = dst_discard; } else { + struct neighbour *neigh; + dst->dev = dev_net(dst->dev)->loopback_dev; dev_hold(dst->dev); dev_put(dev); - if (dst->_neighbour && dst->_neighbour->dev == dev) { - dst->_neighbour->dev = dst->dev; + rcu_read_lock(); + neigh = dst_get_neighbour_noref(dst); + if (neigh && neigh->dev == dev) { + neigh->dev = dst->dev; dev_hold(dst->dev); dev_put(dev); } + rcu_read_unlock(); } } diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 6cdba5fc2bed..921aa2b4b415 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -36,235 +36,44 @@ u32 ethtool_op_get_link(struct net_device *dev) } EXPORT_SYMBOL(ethtool_op_get_link); -u32 ethtool_op_get_tx_csum(struct net_device *dev) -{ - return (dev->features & NETIF_F_ALL_CSUM) != 0; -} -EXPORT_SYMBOL(ethtool_op_get_tx_csum); - -int ethtool_op_set_tx_csum(struct net_device *dev, u32 data) -{ - if (data) - dev->features |= NETIF_F_IP_CSUM; - else - dev->features &= ~NETIF_F_IP_CSUM; - - return 0; -} -EXPORT_SYMBOL(ethtool_op_set_tx_csum); - -int ethtool_op_set_tx_hw_csum(struct net_device *dev, u32 data) -{ - if (data) - dev->features |= NETIF_F_HW_CSUM; - else - dev->features &= ~NETIF_F_HW_CSUM; - - return 0; -} -EXPORT_SYMBOL(ethtool_op_set_tx_hw_csum); - -int ethtool_op_set_tx_ipv6_csum(struct net_device *dev, u32 data) -{ - if (data) - dev->features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; - else - dev->features &= ~(NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM); - - return 0; -} -EXPORT_SYMBOL(ethtool_op_set_tx_ipv6_csum); - -u32 ethtool_op_get_sg(struct net_device *dev) -{ - return (dev->features & NETIF_F_SG) != 0; -} -EXPORT_SYMBOL(ethtool_op_get_sg); - -int ethtool_op_set_sg(struct net_device *dev, u32 data) -{ - if (data) - dev->features |= NETIF_F_SG; - else - dev->features &= ~NETIF_F_SG; - - return 0; -} -EXPORT_SYMBOL(ethtool_op_set_sg); - -u32 ethtool_op_get_tso(struct net_device *dev) -{ - return (dev->features & NETIF_F_TSO) != 0; -} -EXPORT_SYMBOL(ethtool_op_get_tso); - -int ethtool_op_set_tso(struct net_device *dev, u32 data) -{ - if (data) - dev->features |= NETIF_F_TSO; - else - dev->features &= ~NETIF_F_TSO; - - return 0; -} -EXPORT_SYMBOL(ethtool_op_set_tso); - -u32 ethtool_op_get_ufo(struct net_device *dev) -{ - return (dev->features & NETIF_F_UFO) != 0; -} -EXPORT_SYMBOL(ethtool_op_get_ufo); - -int ethtool_op_set_ufo(struct net_device *dev, u32 data) -{ - if (data) - dev->features |= NETIF_F_UFO; - else - dev->features &= ~NETIF_F_UFO; - return 0; -} -EXPORT_SYMBOL(ethtool_op_set_ufo); - -/* the following list of flags are the same as their associated - * NETIF_F_xxx values in include/linux/netdevice.h - */ -static const u32 flags_dup_features = - (ETH_FLAG_LRO | ETH_FLAG_RXVLAN | ETH_FLAG_TXVLAN | ETH_FLAG_NTUPLE | - ETH_FLAG_RXHASH); - -u32 ethtool_op_get_flags(struct net_device *dev) -{ - /* in the future, this function will probably contain additional - * handling for flags which are not so easily handled - * by a simple masking operation - */ - - return dev->features & flags_dup_features; -} -EXPORT_SYMBOL(ethtool_op_get_flags); - -/* Check if device can enable (or disable) particular feature coded in "data" - * argument. Flags "supported" describe features that can be toggled by device. - * If feature can not be toggled, it state (enabled or disabled) must match - * hardcoded device features state, otherwise flags are marked as invalid. - */ -bool ethtool_invalid_flags(struct net_device *dev, u32 data, u32 supported) -{ - u32 features = dev->features & flags_dup_features; - /* "data" can contain only flags_dup_features bits, - * see __ethtool_set_flags */ - - return (features & ~supported) != (data & ~supported); -} -EXPORT_SYMBOL(ethtool_invalid_flags); - -int ethtool_op_set_flags(struct net_device *dev, u32 data, u32 supported) -{ - if (ethtool_invalid_flags(dev, data, supported)) - return -EINVAL; - - dev->features = ((dev->features & ~flags_dup_features) | - (data & flags_dup_features)); - return 0; -} -EXPORT_SYMBOL(ethtool_op_set_flags); - /* Handlers for each ethtool command */ -#define ETHTOOL_DEV_FEATURE_WORDS 1 - -static void ethtool_get_features_compat(struct net_device *dev, - struct ethtool_get_features_block *features) -{ - if (!dev->ethtool_ops) - return; - - /* getting RX checksum */ - if (dev->ethtool_ops->get_rx_csum) - if (dev->ethtool_ops->get_rx_csum(dev)) - features[0].active |= NETIF_F_RXCSUM; - - /* mark legacy-changeable features */ - if (dev->ethtool_ops->set_sg) - features[0].available |= NETIF_F_SG; - if (dev->ethtool_ops->set_tx_csum) - features[0].available |= NETIF_F_ALL_CSUM; - if (dev->ethtool_ops->set_tso) - features[0].available |= NETIF_F_ALL_TSO; - if (dev->ethtool_ops->set_rx_csum) - features[0].available |= NETIF_F_RXCSUM; - if (dev->ethtool_ops->set_flags) - features[0].available |= flags_dup_features; -} - -static int ethtool_set_feature_compat(struct net_device *dev, - int (*legacy_set)(struct net_device *, u32), - struct ethtool_set_features_block *features, u32 mask) -{ - u32 do_set; - - if (!legacy_set) - return 0; - - if (!(features[0].valid & mask)) - return 0; - - features[0].valid &= ~mask; - - do_set = !!(features[0].requested & mask); - - if (legacy_set(dev, do_set) < 0) - netdev_info(dev, - "Legacy feature change (%s) failed for 0x%08x\n", - do_set ? "set" : "clear", mask); - - return 1; -} - -static int ethtool_set_flags_compat(struct net_device *dev, - int (*legacy_set)(struct net_device *, u32), - struct ethtool_set_features_block *features, u32 mask) -{ - u32 value; - - if (!legacy_set) - return 0; - - if (!(features[0].valid & mask)) - return 0; - - value = dev->features & ~features[0].valid; - value |= features[0].requested; - - features[0].valid &= ~mask; - - if (legacy_set(dev, value & mask) < 0) - netdev_info(dev, "Legacy flags change failed\n"); - - return 1; -} - -static int ethtool_set_features_compat(struct net_device *dev, - struct ethtool_set_features_block *features) -{ - int compat; - - if (!dev->ethtool_ops) - return 0; - - compat = ethtool_set_feature_compat(dev, dev->ethtool_ops->set_sg, - features, NETIF_F_SG); - compat |= ethtool_set_feature_compat(dev, dev->ethtool_ops->set_tx_csum, - features, NETIF_F_ALL_CSUM); - compat |= ethtool_set_feature_compat(dev, dev->ethtool_ops->set_tso, - features, NETIF_F_ALL_TSO); - compat |= ethtool_set_feature_compat(dev, dev->ethtool_ops->set_rx_csum, - features, NETIF_F_RXCSUM); - compat |= ethtool_set_flags_compat(dev, dev->ethtool_ops->set_flags, - features, flags_dup_features); - - return compat; -} +#define ETHTOOL_DEV_FEATURE_WORDS ((NETDEV_FEATURE_COUNT + 31) / 32) + +static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] = { + [NETIF_F_SG_BIT] = "tx-scatter-gather", + [NETIF_F_IP_CSUM_BIT] = "tx-checksum-ipv4", + [NETIF_F_HW_CSUM_BIT] = "tx-checksum-ip-generic", + [NETIF_F_IPV6_CSUM_BIT] = "tx-checksum-ipv6", + [NETIF_F_HIGHDMA_BIT] = "highdma", + [NETIF_F_FRAGLIST_BIT] = "tx-scatter-gather-fraglist", + [NETIF_F_HW_VLAN_TX_BIT] = "tx-vlan-hw-insert", + + [NETIF_F_HW_VLAN_RX_BIT] = "rx-vlan-hw-parse", + [NETIF_F_HW_VLAN_FILTER_BIT] = "rx-vlan-filter", + [NETIF_F_VLAN_CHALLENGED_BIT] = "vlan-challenged", + [NETIF_F_GSO_BIT] = "tx-generic-segmentation", + [NETIF_F_LLTX_BIT] = "tx-lockless", + [NETIF_F_NETNS_LOCAL_BIT] = "netns-local", + [NETIF_F_GRO_BIT] = "rx-gro", + [NETIF_F_LRO_BIT] = "rx-lro", + + [NETIF_F_TSO_BIT] = "tx-tcp-segmentation", + [NETIF_F_UFO_BIT] = "tx-udp-fragmentation", + [NETIF_F_GSO_ROBUST_BIT] = "tx-gso-robust", + [NETIF_F_TSO_ECN_BIT] = "tx-tcp-ecn-segmentation", + [NETIF_F_TSO6_BIT] = "tx-tcp6-segmentation", + [NETIF_F_FSO_BIT] = "tx-fcoe-segmentation", + + [NETIF_F_FCOE_CRC_BIT] = "tx-checksum-fcoe-crc", + [NETIF_F_SCTP_CSUM_BIT] = "tx-checksum-sctp", + [NETIF_F_FCOE_MTU_BIT] = "fcoe-mtu", + [NETIF_F_NTUPLE_BIT] = "rx-ntuple-filter", + [NETIF_F_RXHASH_BIT] = "rx-hashing", + [NETIF_F_RXCSUM_BIT] = "rx-checksum", + [NETIF_F_NOCACHE_COPY_BIT] = "tx-nocache-copy", + [NETIF_F_LOOPBACK_BIT] = "loopback", +}; static int ethtool_get_features(struct net_device *dev, void __user *useraddr) { @@ -272,18 +81,21 @@ static int ethtool_get_features(struct net_device *dev, void __user *useraddr) .cmd = ETHTOOL_GFEATURES, .size = ETHTOOL_DEV_FEATURE_WORDS, }; - struct ethtool_get_features_block features[ETHTOOL_DEV_FEATURE_WORDS] = { - { - .available = dev->hw_features, - .requested = dev->wanted_features, - .active = dev->features, - .never_changed = NETIF_F_NEVER_CHANGE, - }, - }; + struct ethtool_get_features_block features[ETHTOOL_DEV_FEATURE_WORDS]; u32 __user *sizeaddr; u32 copy_size; + int i; + + /* in case feature bits run out again */ + BUILD_BUG_ON(ETHTOOL_DEV_FEATURE_WORDS * sizeof(u32) > sizeof(netdev_features_t)); - ethtool_get_features_compat(dev, features); + for (i = 0; i < ETHTOOL_DEV_FEATURE_WORDS; ++i) { + features[i].available = (u32)(dev->hw_features >> (32 * i)); + features[i].requested = (u32)(dev->wanted_features >> (32 * i)); + features[i].active = (u32)(dev->features >> (32 * i)); + features[i].never_changed = + (u32)(NETIF_F_NEVER_CHANGE >> (32 * i)); + } sizeaddr = useraddr + offsetof(struct ethtool_gfeatures, size); if (get_user(copy_size, sizeaddr)) @@ -305,7 +117,8 @@ static int ethtool_set_features(struct net_device *dev, void __user *useraddr) { struct ethtool_sfeatures cmd; struct ethtool_set_features_block features[ETHTOOL_DEV_FEATURE_WORDS]; - int ret = 0; + netdev_features_t wanted = 0, valid = 0; + int i, ret = 0; if (copy_from_user(&cmd, useraddr, sizeof(cmd))) return -EFAULT; @@ -317,65 +130,29 @@ static int ethtool_set_features(struct net_device *dev, void __user *useraddr) if (copy_from_user(features, useraddr, sizeof(features))) return -EFAULT; - if (features[0].valid & ~NETIF_F_ETHTOOL_BITS) - return -EINVAL; + for (i = 0; i < ETHTOOL_DEV_FEATURE_WORDS; ++i) { + valid |= (netdev_features_t)features[i].valid << (32 * i); + wanted |= (netdev_features_t)features[i].requested << (32 * i); + } - if (ethtool_set_features_compat(dev, features)) - ret |= ETHTOOL_F_COMPAT; + if (valid & ~NETIF_F_ETHTOOL_BITS) + return -EINVAL; - if (features[0].valid & ~dev->hw_features) { - features[0].valid &= dev->hw_features; + if (valid & ~dev->hw_features) { + valid &= dev->hw_features; ret |= ETHTOOL_F_UNSUPPORTED; } - dev->wanted_features &= ~features[0].valid; - dev->wanted_features |= features[0].valid & features[0].requested; + dev->wanted_features &= ~valid; + dev->wanted_features |= wanted & valid; __netdev_update_features(dev); - if ((dev->wanted_features ^ dev->features) & features[0].valid) + if ((dev->wanted_features ^ dev->features) & valid) ret |= ETHTOOL_F_WISH; return ret; } -static const char netdev_features_strings[ETHTOOL_DEV_FEATURE_WORDS * 32][ETH_GSTRING_LEN] = { - /* NETIF_F_SG */ "tx-scatter-gather", - /* NETIF_F_IP_CSUM */ "tx-checksum-ipv4", - /* NETIF_F_NO_CSUM */ "tx-checksum-unneeded", - /* NETIF_F_HW_CSUM */ "tx-checksum-ip-generic", - /* NETIF_F_IPV6_CSUM */ "tx-checksum-ipv6", - /* NETIF_F_HIGHDMA */ "highdma", - /* NETIF_F_FRAGLIST */ "tx-scatter-gather-fraglist", - /* NETIF_F_HW_VLAN_TX */ "tx-vlan-hw-insert", - - /* NETIF_F_HW_VLAN_RX */ "rx-vlan-hw-parse", - /* NETIF_F_HW_VLAN_FILTER */ "rx-vlan-filter", - /* NETIF_F_VLAN_CHALLENGED */ "vlan-challenged", - /* NETIF_F_GSO */ "tx-generic-segmentation", - /* NETIF_F_LLTX */ "tx-lockless", - /* NETIF_F_NETNS_LOCAL */ "netns-local", - /* NETIF_F_GRO */ "rx-gro", - /* NETIF_F_LRO */ "rx-lro", - - /* NETIF_F_TSO */ "tx-tcp-segmentation", - /* NETIF_F_UFO */ "tx-udp-fragmentation", - /* NETIF_F_GSO_ROBUST */ "tx-gso-robust", - /* NETIF_F_TSO_ECN */ "tx-tcp-ecn-segmentation", - /* NETIF_F_TSO6 */ "tx-tcp6-segmentation", - /* NETIF_F_FSO */ "tx-fcoe-segmentation", - "", - "", - - /* NETIF_F_FCOE_CRC */ "tx-checksum-fcoe-crc", - /* NETIF_F_SCTP_CSUM */ "tx-checksum-sctp", - /* NETIF_F_FCOE_MTU */ "fcoe-mtu", - /* NETIF_F_NTUPLE */ "rx-ntuple-filter", - /* NETIF_F_RXHASH */ "rx-hashing", - /* NETIF_F_RXCSUM */ "rx-checksum", - /* NETIF_F_NOCACHE_COPY */ "tx-nocache-copy", - /* NETIF_F_LOOPBACK */ "loopback", -}; - static int __ethtool_get_sset_count(struct net_device *dev, int sset) { const struct ethtool_ops *ops = dev->ethtool_ops; @@ -402,7 +179,7 @@ static void __ethtool_get_strings(struct net_device *dev, ops->get_strings(dev, stringset, data); } -static u32 ethtool_get_feature_mask(u32 eth_cmd) +static netdev_features_t ethtool_get_feature_mask(u32 eth_cmd) { /* feature masks of legacy discrete ethtool ops */ @@ -433,151 +210,107 @@ static u32 ethtool_get_feature_mask(u32 eth_cmd) } } -static void *__ethtool_get_one_feature_actor(struct net_device *dev, u32 ethcmd) -{ - const struct ethtool_ops *ops = dev->ethtool_ops; - - if (!ops) - return NULL; - - switch (ethcmd) { - case ETHTOOL_GTXCSUM: - return ops->get_tx_csum; - case ETHTOOL_GRXCSUM: - return ops->get_rx_csum; - case ETHTOOL_SSG: - return ops->get_sg; - case ETHTOOL_STSO: - return ops->get_tso; - case ETHTOOL_SUFO: - return ops->get_ufo; - default: - return NULL; - } -} - -static u32 __ethtool_get_rx_csum_oldbug(struct net_device *dev) -{ - return !!(dev->features & NETIF_F_ALL_CSUM); -} - static int ethtool_get_one_feature(struct net_device *dev, char __user *useraddr, u32 ethcmd) { - u32 mask = ethtool_get_feature_mask(ethcmd); + netdev_features_t mask = ethtool_get_feature_mask(ethcmd); struct ethtool_value edata = { .cmd = ethcmd, .data = !!(dev->features & mask), }; - /* compatibility with discrete get_ ops */ - if (!(dev->hw_features & mask)) { - u32 (*actor)(struct net_device *); - - actor = __ethtool_get_one_feature_actor(dev, ethcmd); - - /* bug compatibility with old get_rx_csum */ - if (ethcmd == ETHTOOL_GRXCSUM && !actor) - actor = __ethtool_get_rx_csum_oldbug; - - if (actor) - edata.data = actor(dev); - } - if (copy_to_user(useraddr, &edata, sizeof(edata))) return -EFAULT; return 0; } -static int __ethtool_set_tx_csum(struct net_device *dev, u32 data); -static int __ethtool_set_rx_csum(struct net_device *dev, u32 data); -static int __ethtool_set_sg(struct net_device *dev, u32 data); -static int __ethtool_set_tso(struct net_device *dev, u32 data); -static int __ethtool_set_ufo(struct net_device *dev, u32 data); - static int ethtool_set_one_feature(struct net_device *dev, void __user *useraddr, u32 ethcmd) { struct ethtool_value edata; - u32 mask; + netdev_features_t mask; if (copy_from_user(&edata, useraddr, sizeof(edata))) return -EFAULT; mask = ethtool_get_feature_mask(ethcmd); mask &= dev->hw_features; - if (mask) { - if (edata.data) - dev->wanted_features |= mask; - else - dev->wanted_features &= ~mask; + if (!mask) + return -EOPNOTSUPP; - __netdev_update_features(dev); - return 0; - } + if (edata.data) + dev->wanted_features |= mask; + else + dev->wanted_features &= ~mask; - /* Driver is not converted to ndo_fix_features or does not - * support changing this offload. In the latter case it won't - * have corresponding ethtool_ops field set. - * - * Following part is to be removed after all drivers advertise - * their changeable features in netdev->hw_features and stop - * using discrete offload setting ops. - */ + __netdev_update_features(dev); - switch (ethcmd) { - case ETHTOOL_STXCSUM: - return __ethtool_set_tx_csum(dev, edata.data); - case ETHTOOL_SRXCSUM: - return __ethtool_set_rx_csum(dev, edata.data); - case ETHTOOL_SSG: - return __ethtool_set_sg(dev, edata.data); - case ETHTOOL_STSO: - return __ethtool_set_tso(dev, edata.data); - case ETHTOOL_SUFO: - return __ethtool_set_ufo(dev, edata.data); - default: - return -EOPNOTSUPP; - } + return 0; } -int __ethtool_set_flags(struct net_device *dev, u32 data) +#define ETH_ALL_FLAGS (ETH_FLAG_LRO | ETH_FLAG_RXVLAN | ETH_FLAG_TXVLAN | \ + ETH_FLAG_NTUPLE | ETH_FLAG_RXHASH) +#define ETH_ALL_FEATURES (NETIF_F_LRO | NETIF_F_HW_VLAN_RX | \ + NETIF_F_HW_VLAN_TX | NETIF_F_NTUPLE | NETIF_F_RXHASH) + +static u32 __ethtool_get_flags(struct net_device *dev) { - u32 changed; + u32 flags = 0; + + if (dev->features & NETIF_F_LRO) flags |= ETH_FLAG_LRO; + if (dev->features & NETIF_F_HW_VLAN_RX) flags |= ETH_FLAG_RXVLAN; + if (dev->features & NETIF_F_HW_VLAN_TX) flags |= ETH_FLAG_TXVLAN; + if (dev->features & NETIF_F_NTUPLE) flags |= ETH_FLAG_NTUPLE; + if (dev->features & NETIF_F_RXHASH) flags |= ETH_FLAG_RXHASH; - if (data & ~flags_dup_features) + return flags; +} + +static int __ethtool_set_flags(struct net_device *dev, u32 data) +{ + netdev_features_t features = 0, changed; + + if (data & ~ETH_ALL_FLAGS) return -EINVAL; - /* legacy set_flags() op */ - if (dev->ethtool_ops->set_flags) { - if (unlikely(dev->hw_features & flags_dup_features)) - netdev_warn(dev, - "driver BUG: mixed hw_features and set_flags()\n"); - return dev->ethtool_ops->set_flags(dev, data); - } + if (data & ETH_FLAG_LRO) features |= NETIF_F_LRO; + if (data & ETH_FLAG_RXVLAN) features |= NETIF_F_HW_VLAN_RX; + if (data & ETH_FLAG_TXVLAN) features |= NETIF_F_HW_VLAN_TX; + if (data & ETH_FLAG_NTUPLE) features |= NETIF_F_NTUPLE; + if (data & ETH_FLAG_RXHASH) features |= NETIF_F_RXHASH; /* allow changing only bits set in hw_features */ - changed = (data ^ dev->features) & flags_dup_features; + changed = (features ^ dev->features) & ETH_ALL_FEATURES; if (changed & ~dev->hw_features) return (changed & dev->hw_features) ? -EINVAL : -EOPNOTSUPP; dev->wanted_features = - (dev->wanted_features & ~changed) | (data & dev->hw_features); + (dev->wanted_features & ~changed) | (features & changed); __netdev_update_features(dev); return 0; } -static int ethtool_get_settings(struct net_device *dev, void __user *useraddr) +int __ethtool_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) { - struct ethtool_cmd cmd = { .cmd = ETHTOOL_GSET }; - int err; + ASSERT_RTNL(); - if (!dev->ethtool_ops->get_settings) + if (!dev->ethtool_ops || !dev->ethtool_ops->get_settings) return -EOPNOTSUPP; - err = dev->ethtool_ops->get_settings(dev, &cmd); + memset(cmd, 0, sizeof(struct ethtool_cmd)); + cmd->cmd = ETHTOOL_GSET; + return dev->ethtool_ops->get_settings(dev, cmd); +} +EXPORT_SYMBOL(__ethtool_get_settings); + +static int ethtool_get_settings(struct net_device *dev, void __user *useraddr) +{ + int err; + struct ethtool_cmd cmd; + + err = __ethtool_get_settings(dev, &cmd); if (err < 0) return err; @@ -706,6 +439,7 @@ static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev, { struct ethtool_rxnfc info; size_t info_size = sizeof(info); + int rc; if (!dev->ethtool_ops->set_rxnfc) return -EOPNOTSUPP; @@ -721,7 +455,15 @@ static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev, if (copy_from_user(&info, useraddr, info_size)) return -EFAULT; - return dev->ethtool_ops->set_rxnfc(dev, &info); + rc = dev->ethtool_ops->set_rxnfc(dev, &info); + if (rc) + return rc; + + if (cmd == ETHTOOL_SRXCLSRLINS && + copy_to_user(useraddr, &info, info_size)) + return -EFAULT; + + return 0; } static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev, @@ -782,34 +524,44 @@ err_out: static noinline_for_stack int ethtool_get_rxfh_indir(struct net_device *dev, void __user *useraddr) { - struct ethtool_rxfh_indir *indir; - u32 table_size; - size_t full_size; + u32 user_size, dev_size; + u32 *indir; int ret; - if (!dev->ethtool_ops->get_rxfh_indir) + if (!dev->ethtool_ops->get_rxfh_indir_size || + !dev->ethtool_ops->get_rxfh_indir) + return -EOPNOTSUPP; + dev_size = dev->ethtool_ops->get_rxfh_indir_size(dev); + if (dev_size == 0) return -EOPNOTSUPP; - if (copy_from_user(&table_size, + if (copy_from_user(&user_size, useraddr + offsetof(struct ethtool_rxfh_indir, size), - sizeof(table_size))) + sizeof(user_size))) return -EFAULT; - if (table_size > - (KMALLOC_MAX_SIZE - sizeof(*indir)) / sizeof(*indir->ring_index)) - return -ENOMEM; - full_size = sizeof(*indir) + sizeof(*indir->ring_index) * table_size; - indir = kzalloc(full_size, GFP_USER); + if (copy_to_user(useraddr + offsetof(struct ethtool_rxfh_indir, size), + &dev_size, sizeof(dev_size))) + return -EFAULT; + + /* If the user buffer size is 0, this is just a query for the + * device table size. Otherwise, if it's smaller than the + * device table size it's an error. + */ + if (user_size < dev_size) + return user_size == 0 ? 0 : -EINVAL; + + indir = kcalloc(dev_size, sizeof(indir[0]), GFP_USER); if (!indir) return -ENOMEM; - indir->cmd = ETHTOOL_GRXFHINDIR; - indir->size = table_size; ret = dev->ethtool_ops->get_rxfh_indir(dev, indir); if (ret) goto out; - if (copy_to_user(useraddr, indir, full_size)) + if (copy_to_user(useraddr + + offsetof(struct ethtool_rxfh_indir, ring_index[0]), + indir, dev_size * sizeof(indir[0]))) ret = -EFAULT; out: @@ -820,30 +572,56 @@ out: static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev, void __user *useraddr) { - struct ethtool_rxfh_indir *indir; - u32 table_size; - size_t full_size; + struct ethtool_rxnfc rx_rings; + u32 user_size, dev_size, i; + u32 *indir; int ret; - if (!dev->ethtool_ops->set_rxfh_indir) + if (!dev->ethtool_ops->get_rxfh_indir_size || + !dev->ethtool_ops->set_rxfh_indir || + !dev->ethtool_ops->get_rxnfc) + return -EOPNOTSUPP; + dev_size = dev->ethtool_ops->get_rxfh_indir_size(dev); + if (dev_size == 0) return -EOPNOTSUPP; - if (copy_from_user(&table_size, + if (copy_from_user(&user_size, useraddr + offsetof(struct ethtool_rxfh_indir, size), - sizeof(table_size))) + sizeof(user_size))) return -EFAULT; - if (table_size > - (KMALLOC_MAX_SIZE - sizeof(*indir)) / sizeof(*indir->ring_index)) - return -ENOMEM; - full_size = sizeof(*indir) + sizeof(*indir->ring_index) * table_size; - indir = kmalloc(full_size, GFP_USER); + if (user_size != 0 && user_size != dev_size) + return -EINVAL; + + indir = kcalloc(dev_size, sizeof(indir[0]), GFP_USER); if (!indir) return -ENOMEM; - if (copy_from_user(indir, useraddr, full_size)) { - ret = -EFAULT; + rx_rings.cmd = ETHTOOL_GRXRINGS; + ret = dev->ethtool_ops->get_rxnfc(dev, &rx_rings, NULL); + if (ret) goto out; + + if (user_size == 0) { + for (i = 0; i < dev_size; i++) + indir[i] = ethtool_rxfh_indir_default(i, rx_rings.data); + } else { + if (copy_from_user(indir, + useraddr + + offsetof(struct ethtool_rxfh_indir, + ring_index[0]), + dev_size * sizeof(indir[0]))) { + ret = -EFAULT; + goto out; + } + + /* Validate ring indices */ + for (i = 0; i < dev_size; i++) { + if (indir[i] >= rx_rings.data) { + ret = -EINVAL; + goto out; + } + } } ret = dev->ethtool_ops->set_rxfh_indir(dev, indir); @@ -853,58 +631,6 @@ out: return ret; } -/* - * ethtool does not (or did not) set masks for flow parameters that are - * not specified, so if both value and mask are 0 then this must be - * treated as equivalent to a mask with all bits set. Implement that - * here rather than in drivers. - */ -static void rx_ntuple_fix_masks(struct ethtool_rx_ntuple_flow_spec *fs) -{ - struct ethtool_tcpip4_spec *entry = &fs->h_u.tcp_ip4_spec; - struct ethtool_tcpip4_spec *mask = &fs->m_u.tcp_ip4_spec; - - if (fs->flow_type != TCP_V4_FLOW && - fs->flow_type != UDP_V4_FLOW && - fs->flow_type != SCTP_V4_FLOW) - return; - - if (!(entry->ip4src | mask->ip4src)) - mask->ip4src = htonl(0xffffffff); - if (!(entry->ip4dst | mask->ip4dst)) - mask->ip4dst = htonl(0xffffffff); - if (!(entry->psrc | mask->psrc)) - mask->psrc = htons(0xffff); - if (!(entry->pdst | mask->pdst)) - mask->pdst = htons(0xffff); - if (!(entry->tos | mask->tos)) - mask->tos = 0xff; - if (!(fs->vlan_tag | fs->vlan_tag_mask)) - fs->vlan_tag_mask = 0xffff; - if (!(fs->data | fs->data_mask)) - fs->data_mask = 0xffffffffffffffffULL; -} - -static noinline_for_stack int ethtool_set_rx_ntuple(struct net_device *dev, - void __user *useraddr) -{ - struct ethtool_rx_ntuple cmd; - const struct ethtool_ops *ops = dev->ethtool_ops; - - if (!ops->set_rx_ntuple) - return -EOPNOTSUPP; - - if (!(dev->features & NETIF_F_NTUPLE)) - return -EINVAL; - - if (copy_from_user(&cmd, useraddr, sizeof(cmd))) - return -EFAULT; - - rx_ntuple_fix_masks(&cmd.fs); - - return ops->set_rx_ntuple(dev, &cmd); -} - static int ethtool_get_regs(struct net_device *dev, char __user *useraddr) { struct ethtool_regs regs; @@ -1221,81 +947,6 @@ static int ethtool_set_pauseparam(struct net_device *dev, void __user *useraddr) return dev->ethtool_ops->set_pauseparam(dev, &pauseparam); } -static int __ethtool_set_sg(struct net_device *dev, u32 data) -{ - int err; - - if (!dev->ethtool_ops->set_sg) - return -EOPNOTSUPP; - - if (data && !(dev->features & NETIF_F_ALL_CSUM)) - return -EINVAL; - - if (!data && dev->ethtool_ops->set_tso) { - err = dev->ethtool_ops->set_tso(dev, 0); - if (err) - return err; - } - - if (!data && dev->ethtool_ops->set_ufo) { - err = dev->ethtool_ops->set_ufo(dev, 0); - if (err) - return err; - } - return dev->ethtool_ops->set_sg(dev, data); -} - -static int __ethtool_set_tx_csum(struct net_device *dev, u32 data) -{ - int err; - - if (!dev->ethtool_ops->set_tx_csum) - return -EOPNOTSUPP; - - if (!data && dev->ethtool_ops->set_sg) { - err = __ethtool_set_sg(dev, 0); - if (err) - return err; - } - - return dev->ethtool_ops->set_tx_csum(dev, data); -} - -static int __ethtool_set_rx_csum(struct net_device *dev, u32 data) -{ - if (!dev->ethtool_ops->set_rx_csum) - return -EOPNOTSUPP; - - if (!data) - dev->features &= ~NETIF_F_GRO; - - return dev->ethtool_ops->set_rx_csum(dev, data); -} - -static int __ethtool_set_tso(struct net_device *dev, u32 data) -{ - if (!dev->ethtool_ops->set_tso) - return -EOPNOTSUPP; - - if (data && !(dev->features & NETIF_F_SG)) - return -EINVAL; - - return dev->ethtool_ops->set_tso(dev, data); -} - -static int __ethtool_set_ufo(struct net_device *dev, u32 data) -{ - if (!dev->ethtool_ops->set_ufo) - return -EOPNOTSUPP; - if (data && !(dev->features & NETIF_F_SG)) - return -EINVAL; - if (data && !((dev->features & NETIF_F_GEN_CSUM) || - (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM)) - == (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) - return -EINVAL; - return dev->ethtool_ops->set_ufo(dev, data); -} - static int ethtool_self_test(struct net_device *dev, char __user *useraddr) { struct ethtool_test test; @@ -1761,9 +1412,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) break; case ETHTOOL_GFLAGS: rc = ethtool_get_value(dev, useraddr, ethcmd, - (dev->ethtool_ops->get_flags ? - dev->ethtool_ops->get_flags : - ethtool_op_get_flags)); + __ethtool_get_flags); break; case ETHTOOL_SFLAGS: rc = ethtool_set_value(dev, useraddr, __ethtool_set_flags); @@ -1794,9 +1443,6 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_RESET: rc = ethtool_reset(dev, useraddr); break; - case ETHTOOL_SRXNTUPLE: - rc = ethtool_set_rx_ntuple(dev, useraddr); - break; case ETHTOOL_GSSET_INFO: rc = ethtool_get_sset_info(dev, useraddr); break; diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 27071ee2a4e1..c02e63c908da 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -12,6 +12,7 @@ #include <linux/kernel.h> #include <linux/slab.h> #include <linux/list.h> +#include <linux/module.h> #include <net/net_namespace.h> #include <net/sock.h> #include <net/fib_rules.h> @@ -490,7 +491,7 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) if (ops->nr_goto_rules > 0) { list_for_each_entry(tmp, &ops->rules_list, list) { if (rtnl_dereference(tmp->ctarget) == rule) { - rcu_assign_pointer(tmp->ctarget, NULL); + RCU_INIT_POINTER(tmp->ctarget, NULL); ops->unresolved_rules++; } } @@ -548,7 +549,7 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule, frh->flags = rule->flags; if (rule->action == FR_ACT_GOTO && - rcu_dereference_raw(rule->ctarget) == NULL) + rcu_access_pointer(rule->ctarget) == NULL) frh->flags |= FIB_RULE_UNRESOLVED; if (rule->iifname[0]) { diff --git a/net/core/filter.c b/net/core/filter.c index 36f975fa87cb..5dea45279215 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -436,7 +436,7 @@ error: * * Returns 0 if the rule set is legal or -EINVAL if not. */ -int sk_chk_filter(struct sock_filter *filter, int flen) +int sk_chk_filter(struct sock_filter *filter, unsigned int flen) { /* * Valid instructions are initialized to non-0. @@ -645,7 +645,7 @@ int sk_detach_filter(struct sock *sk) filter = rcu_dereference_protected(sk->sk_filter, sock_owned_by_user(sk)); if (filter) { - rcu_assign_pointer(sk->sk_filter, NULL); + RCU_INIT_POINTER(sk->sk_filter, NULL); sk_filter_uncharge(sk, filter); ret = 0; } diff --git a/net/core/flow.c b/net/core/flow.c index 555a456efb07..e318c7e98042 100644 --- a/net/core/flow.c +++ b/net/core/flow.c @@ -358,6 +358,18 @@ void flow_cache_flush(void) put_online_cpus(); } +static void flow_cache_flush_task(struct work_struct *work) +{ + flow_cache_flush(); +} + +static DECLARE_WORK(flow_cache_flush_work, flow_cache_flush_task); + +void flow_cache_flush_deferred(void) +{ + schedule_work(&flow_cache_flush_work); +} + static int __cpuinit flow_cache_cpu_prepare(struct flow_cache *fc, int cpu) { struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu); @@ -413,7 +425,7 @@ static int __init flow_cache_init(struct flow_cache *fc) for_each_online_cpu(i) { if (flow_cache_cpu_prepare(fc, i)) - return -ENOMEM; + goto err; } fc->hotcpu_notifier = (struct notifier_block){ .notifier_call = flow_cache_cpu, @@ -426,6 +438,18 @@ static int __init flow_cache_init(struct flow_cache *fc) add_timer(&fc->rnd_timer); return 0; + +err: + for_each_possible_cpu(i) { + struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, i); + kfree(fcp->hash_table); + fcp->hash_table = NULL; + } + + free_percpu(fc->percpu); + fc->percpu = NULL; + + return -ENOMEM; } static int __init flow_cache_init_global(void) diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c new file mode 100644 index 000000000000..0985b9b14b80 --- /dev/null +++ b/net/core/flow_dissector.c @@ -0,0 +1,143 @@ +#include <linux/skbuff.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/if_vlan.h> +#include <net/ip.h> +#include <linux/if_tunnel.h> +#include <linux/if_pppox.h> +#include <linux/ppp_defs.h> +#include <net/flow_keys.h> + +/* copy saddr & daddr, possibly using 64bit load/store + * Equivalent to : flow->src = iph->saddr; + * flow->dst = iph->daddr; + */ +static void iph_to_flow_copy_addrs(struct flow_keys *flow, const struct iphdr *iph) +{ + BUILD_BUG_ON(offsetof(typeof(*flow), dst) != + offsetof(typeof(*flow), src) + sizeof(flow->src)); + memcpy(&flow->src, &iph->saddr, sizeof(flow->src) + sizeof(flow->dst)); +} + +bool skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow) +{ + int poff, nhoff = skb_network_offset(skb); + u8 ip_proto; + __be16 proto = skb->protocol; + + memset(flow, 0, sizeof(*flow)); + +again: + switch (proto) { + case __constant_htons(ETH_P_IP): { + const struct iphdr *iph; + struct iphdr _iph; +ip: + iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph); + if (!iph) + return false; + + if (ip_is_fragment(iph)) + ip_proto = 0; + else + ip_proto = iph->protocol; + iph_to_flow_copy_addrs(flow, iph); + nhoff += iph->ihl * 4; + break; + } + case __constant_htons(ETH_P_IPV6): { + const struct ipv6hdr *iph; + struct ipv6hdr _iph; +ipv6: + iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph); + if (!iph) + return false; + + ip_proto = iph->nexthdr; + flow->src = iph->saddr.s6_addr32[3]; + flow->dst = iph->daddr.s6_addr32[3]; + nhoff += sizeof(struct ipv6hdr); + break; + } + case __constant_htons(ETH_P_8021Q): { + const struct vlan_hdr *vlan; + struct vlan_hdr _vlan; + + vlan = skb_header_pointer(skb, nhoff, sizeof(_vlan), &_vlan); + if (!vlan) + return false; + + proto = vlan->h_vlan_encapsulated_proto; + nhoff += sizeof(*vlan); + goto again; + } + case __constant_htons(ETH_P_PPP_SES): { + struct { + struct pppoe_hdr hdr; + __be16 proto; + } *hdr, _hdr; + hdr = skb_header_pointer(skb, nhoff, sizeof(_hdr), &_hdr); + if (!hdr) + return false; + proto = hdr->proto; + nhoff += PPPOE_SES_HLEN; + switch (proto) { + case __constant_htons(PPP_IP): + goto ip; + case __constant_htons(PPP_IPV6): + goto ipv6; + default: + return false; + } + } + default: + return false; + } + + switch (ip_proto) { + case IPPROTO_GRE: { + struct gre_hdr { + __be16 flags; + __be16 proto; + } *hdr, _hdr; + + hdr = skb_header_pointer(skb, nhoff, sizeof(_hdr), &_hdr); + if (!hdr) + return false; + /* + * Only look inside GRE if version zero and no + * routing + */ + if (!(hdr->flags & (GRE_VERSION|GRE_ROUTING))) { + proto = hdr->proto; + nhoff += 4; + if (hdr->flags & GRE_CSUM) + nhoff += 4; + if (hdr->flags & GRE_KEY) + nhoff += 4; + if (hdr->flags & GRE_SEQ) + nhoff += 4; + goto again; + } + break; + } + case IPPROTO_IPIP: + goto again; + default: + break; + } + + flow->ip_proto = ip_proto; + poff = proto_ports_offset(ip_proto); + if (poff >= 0) { + __be32 *ports, _ports; + + nhoff += poff; + ports = skb_header_pointer(skb, nhoff, sizeof(_ports), &_ports); + if (ports) + flow->ports = *ports; + } + + return true; +} +EXPORT_SYMBOL(skb_flow_dissect); diff --git a/net/core/kmap_skb.h b/net/core/kmap_skb.h index 283c2b993fb8..81e1ed7c8383 100644 --- a/net/core/kmap_skb.h +++ b/net/core/kmap_skb.h @@ -7,7 +7,7 @@ static inline void *kmap_skb_frag(const skb_frag_t *frag) local_bh_disable(); #endif - return kmap_atomic(frag->page, KM_SKB_DATA_SOFTIRQ); + return kmap_atomic(skb_frag_page(frag), KM_SKB_DATA_SOFTIRQ); } static inline void kunmap_skb_frag(void *vaddr) diff --git a/net/core/link_watch.c b/net/core/link_watch.c index 357bd4ee4baa..c3519c6d1b16 100644 --- a/net/core/link_watch.c +++ b/net/core/link_watch.c @@ -78,8 +78,13 @@ static void rfc2863_policy(struct net_device *dev) static bool linkwatch_urgent_event(struct net_device *dev) { - return netif_running(dev) && netif_carrier_ok(dev) && - qdisc_tx_changing(dev); + if (!netif_running(dev)) + return false; + + if (dev->ifindex != dev->iflink) + return true; + + return netif_carrier_ok(dev) && qdisc_tx_changing(dev); } diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 1334d7e56f02..e287346e0934 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -238,6 +238,7 @@ static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev) it to safe state. */ skb_queue_purge(&n->arp_queue); + n->arp_queue_len_bytes = 0; n->output = neigh_blackhole; if (n->nud_state & NUD_VALID) n->nud_state = NUD_NOARP; @@ -272,7 +273,7 @@ int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev) } EXPORT_SYMBOL(neigh_ifdown); -static struct neighbour *neigh_alloc(struct neigh_table *tbl) +static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device *dev) { struct neighbour *n = NULL; unsigned long now = jiffies; @@ -287,7 +288,15 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl) goto out_entries; } - n = kmem_cache_zalloc(tbl->kmem_cachep, GFP_ATOMIC); + if (tbl->entry_size) + n = kzalloc(tbl->entry_size, GFP_ATOMIC); + else { + int sz = sizeof(*n) + tbl->key_len; + + sz = ALIGN(sz, NEIGH_PRIV_ALIGN); + sz += dev->neigh_priv_len; + n = kzalloc(sz, GFP_ATOMIC); + } if (!n) goto out_entries; @@ -313,11 +322,18 @@ out_entries: goto out; } +static void neigh_get_hash_rnd(u32 *x) +{ + get_random_bytes(x, sizeof(*x)); + *x |= 1; +} + static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift) { size_t size = (1 << shift) * sizeof(struct neighbour *); struct neigh_hash_table *ret; struct neighbour __rcu **buckets; + int i; ret = kmalloc(sizeof(*ret), GFP_ATOMIC); if (!ret) @@ -334,8 +350,8 @@ static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift) } ret->hash_buckets = buckets; ret->hash_shift = shift; - get_random_bytes(&ret->hash_rnd, sizeof(ret->hash_rnd)); - ret->hash_rnd |= 1; + for (i = 0; i < NEIGH_NUM_HASH_RND; i++) + neigh_get_hash_rnd(&ret->hash_rnd[i]); return ret; } @@ -462,7 +478,7 @@ struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey, u32 hash_val; int key_len = tbl->key_len; int error; - struct neighbour *n1, *rc, *n = neigh_alloc(tbl); + struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev); struct neigh_hash_table *nht; if (!n) { @@ -480,6 +496,14 @@ struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey, goto out_neigh_release; } + if (dev->netdev_ops->ndo_neigh_construct) { + error = dev->netdev_ops->ndo_neigh_construct(n); + if (error < 0) { + rc = ERR_PTR(error); + goto out_neigh_release; + } + } + /* Device specific setup. */ if (n->parms->neigh_setup && (error = n->parms->neigh_setup(n)) < 0) { @@ -677,18 +701,14 @@ static inline void neigh_parms_put(struct neigh_parms *parms) neigh_parms_destroy(parms); } -static void neigh_destroy_rcu(struct rcu_head *head) -{ - struct neighbour *neigh = container_of(head, struct neighbour, rcu); - - kmem_cache_free(neigh->tbl->kmem_cachep, neigh); -} /* * neighbour must already be out of the table; * */ void neigh_destroy(struct neighbour *neigh) { + struct net_device *dev = neigh->dev; + NEIGH_CACHE_STAT_INC(neigh->tbl, destroys); if (!neigh->dead) { @@ -702,14 +722,18 @@ void neigh_destroy(struct neighbour *neigh) printk(KERN_WARNING "Impossible event.\n"); skb_queue_purge(&neigh->arp_queue); + neigh->arp_queue_len_bytes = 0; - dev_put(neigh->dev); + if (dev->netdev_ops->ndo_neigh_destroy) + dev->netdev_ops->ndo_neigh_destroy(neigh); + + dev_put(dev); neigh_parms_put(neigh->parms); NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh); atomic_dec(&neigh->tbl->entries); - call_rcu(&neigh->rcu, neigh_destroy_rcu); + kfree_rcu(neigh, rcu); } EXPORT_SYMBOL(neigh_destroy); @@ -842,6 +866,20 @@ static void neigh_invalidate(struct neighbour *neigh) write_lock(&neigh->lock); } skb_queue_purge(&neigh->arp_queue); + neigh->arp_queue_len_bytes = 0; +} + +static void neigh_probe(struct neighbour *neigh) + __releases(neigh->lock) +{ + struct sk_buff *skb = skb_peek(&neigh->arp_queue); + /* keep skb alive even if arp_queue overflows */ + if (skb) + skb = skb_copy(skb, GFP_ATOMIC); + write_unlock(&neigh->lock); + neigh->ops->solicit(neigh, skb); + atomic_inc(&neigh->probes); + kfree_skb(skb); } /* Called when a timer expires for a neighbour entry. */ @@ -859,12 +897,8 @@ static void neigh_timer_handler(unsigned long arg) now = jiffies; next = now + HZ; - if (!(state & NUD_IN_TIMER)) { -#ifndef CONFIG_SMP - printk(KERN_WARNING "neigh: timer & !nud_in_timer\n"); -#endif + if (!(state & NUD_IN_TIMER)) goto out; - } if (state & NUD_REACHABLE) { if (time_before_eq(now, @@ -920,14 +954,7 @@ static void neigh_timer_handler(unsigned long arg) neigh_hold(neigh); } if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) { - struct sk_buff *skb = skb_peek(&neigh->arp_queue); - /* keep skb alive even if arp_queue overflows */ - if (skb) - skb = skb_copy(skb, GFP_ATOMIC); - write_unlock(&neigh->lock); - neigh->ops->solicit(neigh, skb); - atomic_inc(&neigh->probes); - kfree_skb(skb); + neigh_probe(neigh); } else { out: write_unlock(&neigh->lock); @@ -942,7 +969,7 @@ out: int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) { int rc; - unsigned long now; + bool immediate_probe = false; write_lock_bh(&neigh->lock); @@ -950,14 +977,16 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE)) goto out_unlock_bh; - now = jiffies; - if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) { if (neigh->parms->mcast_probes + neigh->parms->app_probes) { + unsigned long next, now = jiffies; + atomic_set(&neigh->probes, neigh->parms->ucast_probes); neigh->nud_state = NUD_INCOMPLETE; - neigh->updated = jiffies; - neigh_add_timer(neigh, now + 1); + neigh->updated = now; + next = now + max(neigh->parms->retrans_time, HZ/2); + neigh_add_timer(neigh, next); + immediate_probe = true; } else { neigh->nud_state = NUD_FAILED; neigh->updated = jiffies; @@ -976,20 +1005,29 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) if (neigh->nud_state == NUD_INCOMPLETE) { if (skb) { - if (skb_queue_len(&neigh->arp_queue) >= - neigh->parms->queue_len) { + while (neigh->arp_queue_len_bytes + skb->truesize > + neigh->parms->queue_len_bytes) { struct sk_buff *buff; + buff = __skb_dequeue(&neigh->arp_queue); + if (!buff) + break; + neigh->arp_queue_len_bytes -= buff->truesize; kfree_skb(buff); NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards); } skb_dst_force(skb); __skb_queue_tail(&neigh->arp_queue, skb); + neigh->arp_queue_len_bytes += skb->truesize; } rc = 1; } out_unlock_bh: - write_unlock_bh(&neigh->lock); + if (immediate_probe) + neigh_probe(neigh); + else + write_unlock(&neigh->lock); + local_bh_enable(); return rc; } EXPORT_SYMBOL(__neigh_event_send); @@ -1156,13 +1194,18 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, struct dst_entry *dst = skb_dst(skb); struct neighbour *n2, *n1 = neigh; write_unlock_bh(&neigh->lock); + + rcu_read_lock(); /* On shaper/eql skb->dst->neighbour != neigh :( */ - if (dst && (n2 = dst_get_neighbour(dst)) != NULL) + if (dst && (n2 = dst_get_neighbour_noref(dst)) != NULL) n1 = n2; n1->output(n1, skb); + rcu_read_unlock(); + write_lock_bh(&neigh->lock); } skb_queue_purge(&neigh->arp_queue); + neigh->arp_queue_len_bytes = 0; } out: if (update_isrouter) { @@ -1465,11 +1508,6 @@ void neigh_table_init_no_netlink(struct neigh_table *tbl) tbl->parms.reachable_time = neigh_rand_reach_time(tbl->parms.base_reachable_time); - if (!tbl->kmem_cachep) - tbl->kmem_cachep = - kmem_cache_create(tbl->id, tbl->entry_size, 0, - SLAB_HWCACHE_ALIGN|SLAB_PANIC, - NULL); tbl->stats = alloc_percpu(struct neigh_statistics); if (!tbl->stats) panic("cannot create neighbour cache statistics"); @@ -1554,9 +1592,6 @@ int neigh_table_clear(struct neigh_table *tbl) free_percpu(tbl->stats); tbl->stats = NULL; - kmem_cache_destroy(tbl->kmem_cachep); - tbl->kmem_cachep = NULL; - return 0; } EXPORT_SYMBOL(neigh_table_clear); @@ -1735,7 +1770,11 @@ static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms) NLA_PUT_U32(skb, NDTPA_IFINDEX, parms->dev->ifindex); NLA_PUT_U32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt)); - NLA_PUT_U32(skb, NDTPA_QUEUE_LEN, parms->queue_len); + NLA_PUT_U32(skb, NDTPA_QUEUE_LENBYTES, parms->queue_len_bytes); + /* approximative value for deprecated QUEUE_LEN (in packets) */ + NLA_PUT_U32(skb, NDTPA_QUEUE_LEN, + DIV_ROUND_UP(parms->queue_len_bytes, + SKB_TRUESIZE(ETH_FRAME_LEN))); NLA_PUT_U32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen); NLA_PUT_U32(skb, NDTPA_APP_PROBES, parms->app_probes); NLA_PUT_U32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes); @@ -1796,7 +1835,7 @@ static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl, rcu_read_lock_bh(); nht = rcu_dereference_bh(tbl->nht); - ndc.ndtc_hash_rnd = nht->hash_rnd; + ndc.ndtc_hash_rnd = nht->hash_rnd[0]; ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1); rcu_read_unlock_bh(); @@ -1962,7 +2001,11 @@ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) switch (i) { case NDTPA_QUEUE_LEN: - p->queue_len = nla_get_u32(tbp[i]); + p->queue_len_bytes = nla_get_u32(tbp[i]) * + SKB_TRUESIZE(ETH_FRAME_LEN); + break; + case NDTPA_QUEUE_LENBYTES: + p->queue_len_bytes = nla_get_u32(tbp[i]); break; case NDTPA_PROXY_QLEN: p->proxy_qlen = nla_get_u32(tbp[i]); @@ -2385,7 +2428,10 @@ static struct pneigh_entry *pneigh_get_next(struct seq_file *seq, struct net *net = seq_file_net(seq); struct neigh_table *tbl = state->tbl; - pn = pn->next; + do { + pn = pn->next; + } while (pn && !net_eq(pneigh_net(pn), net)); + while (!pn) { if (++state->bucket > PNEIGH_HASHMASK) break; @@ -2623,117 +2669,158 @@ EXPORT_SYMBOL(neigh_app_ns); #ifdef CONFIG_SYSCTL -#define NEIGH_VARS_MAX 19 +static int proc_unres_qlen(ctl_table *ctl, int write, void __user *buffer, + size_t *lenp, loff_t *ppos) +{ + int size, ret; + ctl_table tmp = *ctl; + + tmp.data = &size; + size = DIV_ROUND_UP(*(int *)ctl->data, SKB_TRUESIZE(ETH_FRAME_LEN)); + ret = proc_dointvec(&tmp, write, buffer, lenp, ppos); + if (write && !ret) + *(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN); + return ret; +} + +enum { + NEIGH_VAR_MCAST_PROBE, + NEIGH_VAR_UCAST_PROBE, + NEIGH_VAR_APP_PROBE, + NEIGH_VAR_RETRANS_TIME, + NEIGH_VAR_BASE_REACHABLE_TIME, + NEIGH_VAR_DELAY_PROBE_TIME, + NEIGH_VAR_GC_STALETIME, + NEIGH_VAR_QUEUE_LEN, + NEIGH_VAR_QUEUE_LEN_BYTES, + NEIGH_VAR_PROXY_QLEN, + NEIGH_VAR_ANYCAST_DELAY, + NEIGH_VAR_PROXY_DELAY, + NEIGH_VAR_LOCKTIME, + NEIGH_VAR_RETRANS_TIME_MS, + NEIGH_VAR_BASE_REACHABLE_TIME_MS, + NEIGH_VAR_GC_INTERVAL, + NEIGH_VAR_GC_THRESH1, + NEIGH_VAR_GC_THRESH2, + NEIGH_VAR_GC_THRESH3, + NEIGH_VAR_MAX +}; static struct neigh_sysctl_table { struct ctl_table_header *sysctl_header; - struct ctl_table neigh_vars[NEIGH_VARS_MAX]; + struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1]; char *dev_name; } neigh_sysctl_template __read_mostly = { .neigh_vars = { - { + [NEIGH_VAR_MCAST_PROBE] = { .procname = "mcast_solicit", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, - { + [NEIGH_VAR_UCAST_PROBE] = { .procname = "ucast_solicit", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, - { + [NEIGH_VAR_APP_PROBE] = { .procname = "app_solicit", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, - { + [NEIGH_VAR_RETRANS_TIME] = { .procname = "retrans_time", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_userhz_jiffies, }, - { + [NEIGH_VAR_BASE_REACHABLE_TIME] = { .procname = "base_reachable_time", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, - { + [NEIGH_VAR_DELAY_PROBE_TIME] = { .procname = "delay_first_probe_time", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, - { + [NEIGH_VAR_GC_STALETIME] = { .procname = "gc_stale_time", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, - { + [NEIGH_VAR_QUEUE_LEN] = { .procname = "unres_qlen", .maxlen = sizeof(int), .mode = 0644, + .proc_handler = proc_unres_qlen, + }, + [NEIGH_VAR_QUEUE_LEN_BYTES] = { + .procname = "unres_qlen_bytes", + .maxlen = sizeof(int), + .mode = 0644, .proc_handler = proc_dointvec, }, - { + [NEIGH_VAR_PROXY_QLEN] = { .procname = "proxy_qlen", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, - { + [NEIGH_VAR_ANYCAST_DELAY] = { .procname = "anycast_delay", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_userhz_jiffies, }, - { + [NEIGH_VAR_PROXY_DELAY] = { .procname = "proxy_delay", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_userhz_jiffies, }, - { + [NEIGH_VAR_LOCKTIME] = { .procname = "locktime", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_userhz_jiffies, }, - { + [NEIGH_VAR_RETRANS_TIME_MS] = { .procname = "retrans_time_ms", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_ms_jiffies, }, - { + [NEIGH_VAR_BASE_REACHABLE_TIME_MS] = { .procname = "base_reachable_time_ms", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_ms_jiffies, }, - { + [NEIGH_VAR_GC_INTERVAL] = { .procname = "gc_interval", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, - { + [NEIGH_VAR_GC_THRESH1] = { .procname = "gc_thresh1", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, - { + [NEIGH_VAR_GC_THRESH2] = { .procname = "gc_thresh2", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, - { + [NEIGH_VAR_GC_THRESH3] = { .procname = "gc_thresh3", .maxlen = sizeof(int), .mode = 0644, @@ -2766,47 +2853,49 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p, if (!t) goto err; - t->neigh_vars[0].data = &p->mcast_probes; - t->neigh_vars[1].data = &p->ucast_probes; - t->neigh_vars[2].data = &p->app_probes; - t->neigh_vars[3].data = &p->retrans_time; - t->neigh_vars[4].data = &p->base_reachable_time; - t->neigh_vars[5].data = &p->delay_probe_time; - t->neigh_vars[6].data = &p->gc_staletime; - t->neigh_vars[7].data = &p->queue_len; - t->neigh_vars[8].data = &p->proxy_qlen; - t->neigh_vars[9].data = &p->anycast_delay; - t->neigh_vars[10].data = &p->proxy_delay; - t->neigh_vars[11].data = &p->locktime; - t->neigh_vars[12].data = &p->retrans_time; - t->neigh_vars[13].data = &p->base_reachable_time; + t->neigh_vars[NEIGH_VAR_MCAST_PROBE].data = &p->mcast_probes; + t->neigh_vars[NEIGH_VAR_UCAST_PROBE].data = &p->ucast_probes; + t->neigh_vars[NEIGH_VAR_APP_PROBE].data = &p->app_probes; + t->neigh_vars[NEIGH_VAR_RETRANS_TIME].data = &p->retrans_time; + t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].data = &p->base_reachable_time; + t->neigh_vars[NEIGH_VAR_DELAY_PROBE_TIME].data = &p->delay_probe_time; + t->neigh_vars[NEIGH_VAR_GC_STALETIME].data = &p->gc_staletime; + t->neigh_vars[NEIGH_VAR_QUEUE_LEN].data = &p->queue_len_bytes; + t->neigh_vars[NEIGH_VAR_QUEUE_LEN_BYTES].data = &p->queue_len_bytes; + t->neigh_vars[NEIGH_VAR_PROXY_QLEN].data = &p->proxy_qlen; + t->neigh_vars[NEIGH_VAR_ANYCAST_DELAY].data = &p->anycast_delay; + t->neigh_vars[NEIGH_VAR_PROXY_DELAY].data = &p->proxy_delay; + t->neigh_vars[NEIGH_VAR_LOCKTIME].data = &p->locktime; + t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].data = &p->retrans_time; + t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].data = &p->base_reachable_time; if (dev) { dev_name_source = dev->name; /* Terminate the table early */ - memset(&t->neigh_vars[14], 0, sizeof(t->neigh_vars[14])); + memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0, + sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL])); } else { dev_name_source = neigh_path[NEIGH_CTL_PATH_DEV].procname; - t->neigh_vars[14].data = (int *)(p + 1); - t->neigh_vars[15].data = (int *)(p + 1) + 1; - t->neigh_vars[16].data = (int *)(p + 1) + 2; - t->neigh_vars[17].data = (int *)(p + 1) + 3; + t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = (int *)(p + 1); + t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = (int *)(p + 1) + 1; + t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = (int *)(p + 1) + 2; + t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = (int *)(p + 1) + 3; } if (handler) { /* RetransTime */ - t->neigh_vars[3].proc_handler = handler; - t->neigh_vars[3].extra1 = dev; + t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler; + t->neigh_vars[NEIGH_VAR_RETRANS_TIME].extra1 = dev; /* ReachableTime */ - t->neigh_vars[4].proc_handler = handler; - t->neigh_vars[4].extra1 = dev; + t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler; + t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].extra1 = dev; /* RetransTime (in milliseconds)*/ - t->neigh_vars[12].proc_handler = handler; - t->neigh_vars[12].extra1 = dev; + t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler; + t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].extra1 = dev; /* ReachableTime (in milliseconds) */ - t->neigh_vars[13].proc_handler = handler; - t->neigh_vars[13].extra1 = dev; + t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler; + t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].extra1 = dev; } t->dev_name = kstrdup(dev_name_source, GFP_KERNEL); diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 1683e5db2f27..f3dbd4f596a4 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -20,6 +20,8 @@ #include <linux/rtnetlink.h> #include <linux/wireless.h> #include <linux/vmalloc.h> +#include <linux/export.h> +#include <linux/jiffies.h> #include <net/wext.h> #include "net-sysfs.h" @@ -147,7 +149,7 @@ static ssize_t show_speed(struct device *dev, if (netif_running(netdev)) { struct ethtool_cmd cmd; - if (!dev_ethtool_get_settings(netdev, &cmd)) + if (!__ethtool_get_settings(netdev, &cmd)) ret = sprintf(buf, fmt_udec, ethtool_cmd_speed(&cmd)); } rtnl_unlock(); @@ -165,7 +167,7 @@ static ssize_t show_duplex(struct device *dev, if (netif_running(netdev)) { struct ethtool_cmd cmd; - if (!dev_ethtool_get_settings(netdev, &cmd)) + if (!__ethtool_get_settings(netdev, &cmd)) ret = sprintf(buf, "%s\n", cmd.duplex ? "full" : "half"); } @@ -605,9 +607,12 @@ static ssize_t store_rps_map(struct netdev_rx_queue *queue, rcu_assign_pointer(queue->rps_map, map); spin_unlock(&rps_map_lock); - if (old_map) + if (map) + jump_label_inc(&rps_needed); + if (old_map) { kfree_rcu(old_map, rcu); - + jump_label_dec(&rps_needed); + } free_cpumask_var(mask); return len; } @@ -617,15 +622,15 @@ static ssize_t show_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue, char *buf) { struct rps_dev_flow_table *flow_table; - unsigned int val = 0; + unsigned long val = 0; rcu_read_lock(); flow_table = rcu_dereference(queue->rps_flow_table); if (flow_table) - val = flow_table->mask + 1; + val = (unsigned long)flow_table->mask + 1; rcu_read_unlock(); - return sprintf(buf, "%u\n", val); + return sprintf(buf, "%lu\n", val); } static void rps_dev_flow_table_release_work(struct work_struct *work) @@ -649,33 +654,46 @@ static ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue, struct rx_queue_attribute *attr, const char *buf, size_t len) { - unsigned int count; - char *endp; + unsigned long mask, count; struct rps_dev_flow_table *table, *old_table; static DEFINE_SPINLOCK(rps_dev_flow_lock); + int rc; if (!capable(CAP_NET_ADMIN)) return -EPERM; - count = simple_strtoul(buf, &endp, 0); - if (endp == buf) - return -EINVAL; + rc = kstrtoul(buf, 0, &count); + if (rc < 0) + return rc; if (count) { - int i; - - if (count > 1<<30) { + mask = count - 1; + /* mask = roundup_pow_of_two(count) - 1; + * without overflows... + */ + while ((mask | (mask >> 1)) != mask) + mask |= (mask >> 1); + /* On 64 bit arches, must check mask fits in table->mask (u32), + * and on 32bit arches, must check RPS_DEV_FLOW_TABLE_SIZE(mask + 1) + * doesnt overflow. + */ +#if BITS_PER_LONG > 32 + if (mask > (unsigned long)(u32)mask) + return -EINVAL; +#else + if (mask > (ULONG_MAX - RPS_DEV_FLOW_TABLE_SIZE(1)) + / sizeof(struct rps_dev_flow)) { /* Enforce a limit to prevent overflow */ return -EINVAL; } - count = roundup_pow_of_two(count); - table = vmalloc(RPS_DEV_FLOW_TABLE_SIZE(count)); +#endif + table = vmalloc(RPS_DEV_FLOW_TABLE_SIZE(mask + 1)); if (!table) return -ENOMEM; - table->mask = count - 1; - for (i = 0; i < count; i++) - table->flows[i].cpu = RPS_NO_CPU; + table->mask = mask; + for (count = 0; count <= mask; count++) + table->flows[count].cpu = RPS_NO_CPU; } else table = NULL; @@ -712,13 +730,13 @@ static void rx_queue_release(struct kobject *kobj) struct rps_dev_flow_table *flow_table; - map = rcu_dereference_raw(queue->rps_map); + map = rcu_dereference_protected(queue->rps_map, 1); if (map) { RCU_INIT_POINTER(queue->rps_map, NULL); kfree_rcu(map, rcu); } - flow_table = rcu_dereference_raw(queue->rps_flow_table); + flow_table = rcu_dereference_protected(queue->rps_flow_table, 1); if (flow_table) { RCU_INIT_POINTER(queue->rps_flow_table, NULL); call_rcu(&flow_table->rcu, rps_dev_flow_table_release); @@ -779,7 +797,7 @@ net_rx_queue_update_kobjects(struct net_device *net, int old_num, int new_num) #endif } -#ifdef CONFIG_XPS +#ifdef CONFIG_SYSFS /* * netdev_queue sysfs structures and functions. */ @@ -825,6 +843,133 @@ static const struct sysfs_ops netdev_queue_sysfs_ops = { .store = netdev_queue_attr_store, }; +static ssize_t show_trans_timeout(struct netdev_queue *queue, + struct netdev_queue_attribute *attribute, + char *buf) +{ + unsigned long trans_timeout; + + spin_lock_irq(&queue->_xmit_lock); + trans_timeout = queue->trans_timeout; + spin_unlock_irq(&queue->_xmit_lock); + + return sprintf(buf, "%lu", trans_timeout); +} + +static struct netdev_queue_attribute queue_trans_timeout = + __ATTR(tx_timeout, S_IRUGO, show_trans_timeout, NULL); + +#ifdef CONFIG_BQL +/* + * Byte queue limits sysfs structures and functions. + */ +static ssize_t bql_show(char *buf, unsigned int value) +{ + return sprintf(buf, "%u\n", value); +} + +static ssize_t bql_set(const char *buf, const size_t count, + unsigned int *pvalue) +{ + unsigned int value; + int err; + + if (!strcmp(buf, "max") || !strcmp(buf, "max\n")) + value = DQL_MAX_LIMIT; + else { + err = kstrtouint(buf, 10, &value); + if (err < 0) + return err; + if (value > DQL_MAX_LIMIT) + return -EINVAL; + } + + *pvalue = value; + + return count; +} + +static ssize_t bql_show_hold_time(struct netdev_queue *queue, + struct netdev_queue_attribute *attr, + char *buf) +{ + struct dql *dql = &queue->dql; + + return sprintf(buf, "%u\n", jiffies_to_msecs(dql->slack_hold_time)); +} + +static ssize_t bql_set_hold_time(struct netdev_queue *queue, + struct netdev_queue_attribute *attribute, + const char *buf, size_t len) +{ + struct dql *dql = &queue->dql; + unsigned value; + int err; + + err = kstrtouint(buf, 10, &value); + if (err < 0) + return err; + + dql->slack_hold_time = msecs_to_jiffies(value); + + return len; +} + +static struct netdev_queue_attribute bql_hold_time_attribute = + __ATTR(hold_time, S_IRUGO | S_IWUSR, bql_show_hold_time, + bql_set_hold_time); + +static ssize_t bql_show_inflight(struct netdev_queue *queue, + struct netdev_queue_attribute *attr, + char *buf) +{ + struct dql *dql = &queue->dql; + + return sprintf(buf, "%u\n", dql->num_queued - dql->num_completed); +} + +static struct netdev_queue_attribute bql_inflight_attribute = + __ATTR(inflight, S_IRUGO | S_IWUSR, bql_show_inflight, NULL); + +#define BQL_ATTR(NAME, FIELD) \ +static ssize_t bql_show_ ## NAME(struct netdev_queue *queue, \ + struct netdev_queue_attribute *attr, \ + char *buf) \ +{ \ + return bql_show(buf, queue->dql.FIELD); \ +} \ + \ +static ssize_t bql_set_ ## NAME(struct netdev_queue *queue, \ + struct netdev_queue_attribute *attr, \ + const char *buf, size_t len) \ +{ \ + return bql_set(buf, len, &queue->dql.FIELD); \ +} \ + \ +static struct netdev_queue_attribute bql_ ## NAME ## _attribute = \ + __ATTR(NAME, S_IRUGO | S_IWUSR, bql_show_ ## NAME, \ + bql_set_ ## NAME); + +BQL_ATTR(limit, limit) +BQL_ATTR(limit_max, max_limit) +BQL_ATTR(limit_min, min_limit) + +static struct attribute *dql_attrs[] = { + &bql_limit_attribute.attr, + &bql_limit_max_attribute.attr, + &bql_limit_min_attribute.attr, + &bql_hold_time_attribute.attr, + &bql_inflight_attribute.attr, + NULL +}; + +static struct attribute_group dql_group = { + .name = "byte_queue_limits", + .attrs = dql_attrs, +}; +#endif /* CONFIG_BQL */ + +#ifdef CONFIG_XPS static inline unsigned int get_netdev_queue_index(struct netdev_queue *queue) { struct net_device *dev = queue->dev; @@ -889,6 +1034,52 @@ static DEFINE_MUTEX(xps_map_mutex); #define xmap_dereference(P) \ rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex)) +static void xps_queue_release(struct netdev_queue *queue) +{ + struct net_device *dev = queue->dev; + struct xps_dev_maps *dev_maps; + struct xps_map *map; + unsigned long index; + int i, pos, nonempty = 0; + + index = get_netdev_queue_index(queue); + + mutex_lock(&xps_map_mutex); + dev_maps = xmap_dereference(dev->xps_maps); + + if (dev_maps) { + for_each_possible_cpu(i) { + map = xmap_dereference(dev_maps->cpu_map[i]); + if (!map) + continue; + + for (pos = 0; pos < map->len; pos++) + if (map->queues[pos] == index) + break; + + if (pos < map->len) { + if (map->len > 1) + map->queues[pos] = + map->queues[--map->len]; + else { + RCU_INIT_POINTER(dev_maps->cpu_map[i], + NULL); + kfree_rcu(map, rcu); + map = NULL; + } + } + if (map) + nonempty = 1; + } + + if (!nonempty) { + RCU_INIT_POINTER(dev->xps_maps, NULL); + kfree_rcu(dev_maps, rcu); + } + } + mutex_unlock(&xps_map_mutex); +} + static ssize_t store_xps_map(struct netdev_queue *queue, struct netdev_queue_attribute *attribute, const char *buf, size_t len) @@ -900,7 +1091,7 @@ static ssize_t store_xps_map(struct netdev_queue *queue, struct xps_map *map, *new_map; struct xps_dev_maps *dev_maps, *new_dev_maps; int nonempty = 0; - int numa_node = -2; + int numa_node_id = -2; if (!capable(CAP_NET_ADMIN)) return -EPERM; @@ -943,10 +1134,10 @@ static ssize_t store_xps_map(struct netdev_queue *queue, need_set = cpumask_test_cpu(cpu, mask) && cpu_online(cpu); #ifdef CONFIG_NUMA if (need_set) { - if (numa_node == -2) - numa_node = cpu_to_node(cpu); - else if (numa_node != cpu_to_node(cpu)) - numa_node = -1; + if (numa_node_id == -2) + numa_node_id = cpu_to_node(cpu); + else if (numa_node_id != cpu_to_node(cpu)) + numa_node_id = -1; } #endif if (need_set && pos >= map_len) { @@ -986,17 +1177,17 @@ static ssize_t store_xps_map(struct netdev_queue *queue, nonempty = 1; } - if (nonempty) + if (nonempty) { rcu_assign_pointer(dev->xps_maps, new_dev_maps); - else { + } else { kfree(new_dev_maps); - rcu_assign_pointer(dev->xps_maps, NULL); + RCU_INIT_POINTER(dev->xps_maps, NULL); } if (dev_maps) kfree_rcu(dev_maps, rcu); - netdev_queue_numa_node_write(queue, (numa_node >= 0) ? numa_node : + netdev_queue_numa_node_write(queue, (numa_node_id >= 0) ? numa_node_id : NUMA_NO_NODE); mutex_unlock(&xps_map_mutex); @@ -1019,58 +1210,23 @@ error: static struct netdev_queue_attribute xps_cpus_attribute = __ATTR(xps_cpus, S_IRUGO | S_IWUSR, show_xps_map, store_xps_map); +#endif /* CONFIG_XPS */ static struct attribute *netdev_queue_default_attrs[] = { + &queue_trans_timeout.attr, +#ifdef CONFIG_XPS &xps_cpus_attribute.attr, +#endif NULL }; static void netdev_queue_release(struct kobject *kobj) { struct netdev_queue *queue = to_netdev_queue(kobj); - struct net_device *dev = queue->dev; - struct xps_dev_maps *dev_maps; - struct xps_map *map; - unsigned long index; - int i, pos, nonempty = 0; - - index = get_netdev_queue_index(queue); - - mutex_lock(&xps_map_mutex); - dev_maps = xmap_dereference(dev->xps_maps); - - if (dev_maps) { - for_each_possible_cpu(i) { - map = xmap_dereference(dev_maps->cpu_map[i]); - if (!map) - continue; - for (pos = 0; pos < map->len; pos++) - if (map->queues[pos] == index) - break; - - if (pos < map->len) { - if (map->len > 1) - map->queues[pos] = - map->queues[--map->len]; - else { - RCU_INIT_POINTER(dev_maps->cpu_map[i], - NULL); - kfree_rcu(map, rcu); - map = NULL; - } - } - if (map) - nonempty = 1; - } - - if (!nonempty) { - RCU_INIT_POINTER(dev->xps_maps, NULL); - kfree_rcu(dev_maps, rcu); - } - } - - mutex_unlock(&xps_map_mutex); +#ifdef CONFIG_XPS + xps_queue_release(queue); +#endif memset(kobj, 0, sizeof(*kobj)); dev_put(queue->dev); @@ -1091,22 +1247,29 @@ static int netdev_queue_add_kobject(struct net_device *net, int index) kobj->kset = net->queues_kset; error = kobject_init_and_add(kobj, &netdev_queue_ktype, NULL, "tx-%u", index); - if (error) { - kobject_put(kobj); - return error; - } + if (error) + goto exit; + +#ifdef CONFIG_BQL + error = sysfs_create_group(kobj, &dql_group); + if (error) + goto exit; +#endif kobject_uevent(kobj, KOBJ_ADD); dev_hold(queue->dev); + return 0; +exit: + kobject_put(kobj); return error; } -#endif /* CONFIG_XPS */ +#endif /* CONFIG_SYSFS */ int netdev_queue_update_kobjects(struct net_device *net, int old_num, int new_num) { -#ifdef CONFIG_XPS +#ifdef CONFIG_SYSFS int i; int error = 0; @@ -1118,20 +1281,26 @@ netdev_queue_update_kobjects(struct net_device *net, int old_num, int new_num) } } - while (--i >= new_num) - kobject_put(&net->_tx[i].kobj); + while (--i >= new_num) { + struct netdev_queue *queue = net->_tx + i; + +#ifdef CONFIG_BQL + sysfs_remove_group(&queue->kobj, &dql_group); +#endif + kobject_put(&queue->kobj); + } return error; #else return 0; -#endif +#endif /* CONFIG_SYSFS */ } static int register_queue_kobjects(struct net_device *net) { int error = 0, txq = 0, rxq = 0, real_rx = 0, real_tx = 0; -#if defined(CONFIG_RPS) || defined(CONFIG_XPS) +#ifdef CONFIG_SYSFS net->queues_kset = kset_create_and_add("queues", NULL, &net->dev.kobj); if (!net->queues_kset) @@ -1172,7 +1341,7 @@ static void remove_queue_kobjects(struct net_device *net) net_rx_queue_update_kobjects(net, real_rx, 0); netdev_queue_update_kobjects(net, real_tx, 0); -#if defined(CONFIG_RPS) || defined(CONFIG_XPS) +#ifdef CONFIG_SYSFS kset_unregister(net->queues_kset); #endif } diff --git a/net/core/net-traces.c b/net/core/net-traces.c index 52380b1d552a..ba3c0120786c 100644 --- a/net/core/net-traces.c +++ b/net/core/net-traces.c @@ -11,6 +11,7 @@ #include <linux/inetdevice.h> #include <linux/inet.h> #include <linux/interrupt.h> +#include <linux/export.h> #include <linux/netpoll.h> #include <linux/sched.h> #include <linux/delay.h> diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 5bbdbf0d3664..aefcd7acbffa 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -10,6 +10,7 @@ #include <linux/nsproxy.h> #include <linux/proc_fs.h> #include <linux/file.h> +#include <linux/export.h> #include <net/net_namespace.h> #include <net/netns/generic.h> diff --git a/net/core/netevent.c b/net/core/netevent.c index 865f0ceb81fb..f17ccd291d39 100644 --- a/net/core/netevent.c +++ b/net/core/netevent.c @@ -15,6 +15,7 @@ #include <linux/rtnetlink.h> #include <linux/notifier.h> +#include <linux/export.h> #include <net/netevent.h> static ATOMIC_NOTIFIER_HEAD(netevent_notif_chain); diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 52622517e0d8..556b08298669 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -23,6 +23,7 @@ #include <linux/rcupdate.h> #include <linux/workqueue.h> #include <linux/slab.h> +#include <linux/export.h> #include <net/tcp.h> #include <net/udp.h> #include <asm/unaligned.h> @@ -75,7 +76,7 @@ static void queue_process(struct work_struct *work) local_irq_save(flags); __netif_tx_lock(txq, smp_processor_id()); - if (netif_tx_queue_frozen_or_stopped(txq) || + if (netif_xmit_frozen_or_stopped(txq) || ops->ndo_start_xmit(skb, dev) != NETDEV_TX_OK) { skb_queue_head(&npinfo->txq, skb); __netif_tx_unlock(txq); @@ -316,7 +317,7 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb, for (tries = jiffies_to_usecs(1)/USEC_PER_POLL; tries > 0; --tries) { if (__netif_tx_trylock(txq)) { - if (!netif_tx_queue_stopped(txq)) { + if (!netif_xmit_stopped(txq)) { status = ops->ndo_start_xmit(skb, dev); if (status == NETDEV_TX_OK) txq_trans_update(txq); @@ -421,6 +422,7 @@ static void arp_reply(struct sk_buff *skb) struct sk_buff *send_skb; struct netpoll *np, *tmp; unsigned long flags; + int hlen, tlen; int hits = 0; if (list_empty(&npinfo->rx_np)) @@ -478,8 +480,9 @@ static void arp_reply(struct sk_buff *skb) if (tip != np->local_ip) continue; - send_skb = find_skb(np, size + LL_ALLOCATED_SPACE(np->dev), - LL_RESERVED_SPACE(np->dev)); + hlen = LL_RESERVED_SPACE(np->dev); + tlen = np->dev->needed_tailroom; + send_skb = find_skb(np, size + hlen + tlen, hlen); if (!send_skb) continue; @@ -903,7 +906,7 @@ void __netpoll_cleanup(struct netpoll *np) if (ops->ndo_netpoll_cleanup) ops->ndo_netpoll_cleanup(np->dev); - rcu_assign_pointer(np->dev->npinfo, NULL); + RCU_INIT_POINTER(np->dev->npinfo, NULL); /* avoid racing with NAPI reading npinfo */ synchronize_rcu_bh(); diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c new file mode 100644 index 000000000000..3a9fd4826b75 --- /dev/null +++ b/net/core/netprio_cgroup.c @@ -0,0 +1,344 @@ +/* + * net/core/netprio_cgroup.c Priority Control Group + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors: Neil Horman <nhorman@tuxdriver.com> + */ + +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/types.h> +#include <linux/string.h> +#include <linux/errno.h> +#include <linux/skbuff.h> +#include <linux/cgroup.h> +#include <linux/rcupdate.h> +#include <linux/atomic.h> +#include <net/rtnetlink.h> +#include <net/pkt_cls.h> +#include <net/sock.h> +#include <net/netprio_cgroup.h> + +static struct cgroup_subsys_state *cgrp_create(struct cgroup_subsys *ss, + struct cgroup *cgrp); +static void cgrp_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp); +static int cgrp_populate(struct cgroup_subsys *ss, struct cgroup *cgrp); + +struct cgroup_subsys net_prio_subsys = { + .name = "net_prio", + .create = cgrp_create, + .destroy = cgrp_destroy, + .populate = cgrp_populate, +#ifdef CONFIG_NETPRIO_CGROUP + .subsys_id = net_prio_subsys_id, +#endif + .module = THIS_MODULE +}; + +#define PRIOIDX_SZ 128 + +static unsigned long prioidx_map[PRIOIDX_SZ]; +static DEFINE_SPINLOCK(prioidx_map_lock); +static atomic_t max_prioidx = ATOMIC_INIT(0); + +static inline struct cgroup_netprio_state *cgrp_netprio_state(struct cgroup *cgrp) +{ + return container_of(cgroup_subsys_state(cgrp, net_prio_subsys_id), + struct cgroup_netprio_state, css); +} + +static int get_prioidx(u32 *prio) +{ + unsigned long flags; + u32 prioidx; + + spin_lock_irqsave(&prioidx_map_lock, flags); + prioidx = find_first_zero_bit(prioidx_map, sizeof(unsigned long) * PRIOIDX_SZ); + set_bit(prioidx, prioidx_map); + spin_unlock_irqrestore(&prioidx_map_lock, flags); + if (prioidx == sizeof(unsigned long) * PRIOIDX_SZ) + return -ENOSPC; + + atomic_set(&max_prioidx, prioidx); + *prio = prioidx; + return 0; +} + +static void put_prioidx(u32 idx) +{ + unsigned long flags; + + spin_lock_irqsave(&prioidx_map_lock, flags); + clear_bit(idx, prioidx_map); + spin_unlock_irqrestore(&prioidx_map_lock, flags); +} + +static void extend_netdev_table(struct net_device *dev, u32 new_len) +{ + size_t new_size = sizeof(struct netprio_map) + + ((sizeof(u32) * new_len)); + struct netprio_map *new_priomap = kzalloc(new_size, GFP_KERNEL); + struct netprio_map *old_priomap; + int i; + + old_priomap = rtnl_dereference(dev->priomap); + + if (!new_priomap) { + printk(KERN_WARNING "Unable to alloc new priomap!\n"); + return; + } + + for (i = 0; + old_priomap && (i < old_priomap->priomap_len); + i++) + new_priomap->priomap[i] = old_priomap->priomap[i]; + + new_priomap->priomap_len = new_len; + + rcu_assign_pointer(dev->priomap, new_priomap); + if (old_priomap) + kfree_rcu(old_priomap, rcu); +} + +static void update_netdev_tables(void) +{ + struct net_device *dev; + u32 max_len = atomic_read(&max_prioidx); + struct netprio_map *map; + + rtnl_lock(); + for_each_netdev(&init_net, dev) { + map = rtnl_dereference(dev->priomap); + if ((!map) || + (map->priomap_len < max_len)) + extend_netdev_table(dev, max_len); + } + rtnl_unlock(); +} + +static struct cgroup_subsys_state *cgrp_create(struct cgroup_subsys *ss, + struct cgroup *cgrp) +{ + struct cgroup_netprio_state *cs; + int ret; + + cs = kzalloc(sizeof(*cs), GFP_KERNEL); + if (!cs) + return ERR_PTR(-ENOMEM); + + if (cgrp->parent && cgrp_netprio_state(cgrp->parent)->prioidx) { + kfree(cs); + return ERR_PTR(-EINVAL); + } + + ret = get_prioidx(&cs->prioidx); + if (ret != 0) { + printk(KERN_WARNING "No space in priority index array\n"); + kfree(cs); + return ERR_PTR(ret); + } + + return &cs->css; +} + +static void cgrp_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp) +{ + struct cgroup_netprio_state *cs; + struct net_device *dev; + struct netprio_map *map; + + cs = cgrp_netprio_state(cgrp); + rtnl_lock(); + for_each_netdev(&init_net, dev) { + map = rtnl_dereference(dev->priomap); + if (map) + map->priomap[cs->prioidx] = 0; + } + rtnl_unlock(); + put_prioidx(cs->prioidx); + kfree(cs); +} + +static u64 read_prioidx(struct cgroup *cgrp, struct cftype *cft) +{ + return (u64)cgrp_netprio_state(cgrp)->prioidx; +} + +static int read_priomap(struct cgroup *cont, struct cftype *cft, + struct cgroup_map_cb *cb) +{ + struct net_device *dev; + u32 prioidx = cgrp_netprio_state(cont)->prioidx; + u32 priority; + struct netprio_map *map; + + rcu_read_lock(); + for_each_netdev_rcu(&init_net, dev) { + map = rcu_dereference(dev->priomap); + priority = map ? map->priomap[prioidx] : 0; + cb->fill(cb, dev->name, priority); + } + rcu_read_unlock(); + return 0; +} + +static int write_priomap(struct cgroup *cgrp, struct cftype *cft, + const char *buffer) +{ + char *devname = kstrdup(buffer, GFP_KERNEL); + int ret = -EINVAL; + u32 prioidx = cgrp_netprio_state(cgrp)->prioidx; + unsigned long priority; + char *priostr; + struct net_device *dev; + struct netprio_map *map; + + if (!devname) + return -ENOMEM; + + /* + * Minimally sized valid priomap string + */ + if (strlen(devname) < 3) + goto out_free_devname; + + priostr = strstr(devname, " "); + if (!priostr) + goto out_free_devname; + + /* + *Separate the devname from the associated priority + *and advance the priostr poitner to the priority value + */ + *priostr = '\0'; + priostr++; + + /* + * If the priostr points to NULL, we're at the end of the passed + * in string, and its not a valid write + */ + if (*priostr == '\0') + goto out_free_devname; + + ret = kstrtoul(priostr, 10, &priority); + if (ret < 0) + goto out_free_devname; + + ret = -ENODEV; + + dev = dev_get_by_name(&init_net, devname); + if (!dev) + goto out_free_devname; + + update_netdev_tables(); + ret = 0; + rcu_read_lock(); + map = rcu_dereference(dev->priomap); + if (map) + map->priomap[prioidx] = priority; + rcu_read_unlock(); + dev_put(dev); + +out_free_devname: + kfree(devname); + return ret; +} + +static struct cftype ss_files[] = { + { + .name = "prioidx", + .read_u64 = read_prioidx, + }, + { + .name = "ifpriomap", + .read_map = read_priomap, + .write_string = write_priomap, + }, +}; + +static int cgrp_populate(struct cgroup_subsys *ss, struct cgroup *cgrp) +{ + return cgroup_add_files(cgrp, ss, ss_files, ARRAY_SIZE(ss_files)); +} + +static int netprio_device_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct net_device *dev = ptr; + struct netprio_map *old; + u32 max_len = atomic_read(&max_prioidx); + + /* + * Note this is called with rtnl_lock held so we have update side + * protection on our rcu assignments + */ + + switch (event) { + + case NETDEV_REGISTER: + if (max_len) + extend_netdev_table(dev, max_len); + break; + case NETDEV_UNREGISTER: + old = rtnl_dereference(dev->priomap); + RCU_INIT_POINTER(dev->priomap, NULL); + if (old) + kfree_rcu(old, rcu); + break; + } + return NOTIFY_DONE; +} + +static struct notifier_block netprio_device_notifier = { + .notifier_call = netprio_device_event +}; + +static int __init init_cgroup_netprio(void) +{ + int ret; + + ret = cgroup_load_subsys(&net_prio_subsys); + if (ret) + goto out; +#ifndef CONFIG_NETPRIO_CGROUP + smp_wmb(); + net_prio_subsys_id = net_prio_subsys.subsys_id; +#endif + + register_netdevice_notifier(&netprio_device_notifier); + +out: + return ret; +} + +static void __exit exit_cgroup_netprio(void) +{ + struct netprio_map *old; + struct net_device *dev; + + unregister_netdevice_notifier(&netprio_device_notifier); + + cgroup_unload_subsys(&net_prio_subsys); + +#ifndef CONFIG_NETPRIO_CGROUP + net_prio_subsys_id = -1; + synchronize_rcu(); +#endif + + rtnl_lock(); + for_each_netdev(&init_net, dev) { + old = rtnl_dereference(dev->priomap); + RCU_INIT_POINTER(dev->priomap, NULL); + if (old) + kfree_rcu(old, rcu); + } + rtnl_unlock(); +} + +module_init(init_cgroup_netprio); +module_exit(exit_cgroup_netprio); +MODULE_LICENSE("GPL v2"); diff --git a/net/core/pktgen.c b/net/core/pktgen.c index e35a6fbb8110..65f80c7b1656 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -1304,7 +1304,7 @@ static ssize_t pktgen_if_write(struct file *file, scan_ip6(buf, pkt_dev->in6_daddr.s6_addr); snprintf(buf, sizeof(buf), "%pI6c", &pkt_dev->in6_daddr); - ipv6_addr_copy(&pkt_dev->cur_in6_daddr, &pkt_dev->in6_daddr); + pkt_dev->cur_in6_daddr = pkt_dev->in6_daddr; if (debug) printk(KERN_DEBUG "pktgen: dst6 set to: %s\n", buf); @@ -1327,8 +1327,7 @@ static ssize_t pktgen_if_write(struct file *file, scan_ip6(buf, pkt_dev->min_in6_daddr.s6_addr); snprintf(buf, sizeof(buf), "%pI6c", &pkt_dev->min_in6_daddr); - ipv6_addr_copy(&pkt_dev->cur_in6_daddr, - &pkt_dev->min_in6_daddr); + pkt_dev->cur_in6_daddr = pkt_dev->min_in6_daddr; if (debug) printk(KERN_DEBUG "pktgen: dst6_min set to: %s\n", buf); @@ -1371,7 +1370,7 @@ static ssize_t pktgen_if_write(struct file *file, scan_ip6(buf, pkt_dev->in6_saddr.s6_addr); snprintf(buf, sizeof(buf), "%pI6c", &pkt_dev->in6_saddr); - ipv6_addr_copy(&pkt_dev->cur_in6_saddr, &pkt_dev->in6_saddr); + pkt_dev->cur_in6_saddr = pkt_dev->in6_saddr; if (debug) printk(KERN_DEBUG "pktgen: src6 set to: %s\n", buf); @@ -2025,13 +2024,13 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev) pr_warning("WARNING: Requested queue_map_min (zero-based) (%d) exceeds valid range [0 - %d] for (%d) queues on %s, resetting\n", pkt_dev->queue_map_min, (ntxq ?: 1) - 1, ntxq, pkt_dev->odevname); - pkt_dev->queue_map_min = ntxq - 1; + pkt_dev->queue_map_min = (ntxq ?: 1) - 1; } if (pkt_dev->queue_map_max >= ntxq) { pr_warning("WARNING: Requested queue_map_max (zero-based) (%d) exceeds valid range [0 - %d] for (%d) queues on %s, resetting\n", pkt_dev->queue_map_max, (ntxq ?: 1) - 1, ntxq, pkt_dev->odevname); - pkt_dev->queue_map_max = ntxq - 1; + pkt_dev->queue_map_max = (ntxq ?: 1) - 1; } /* Default to the interface's mac if not explicitly set. */ @@ -2079,9 +2078,7 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev) ifp = ifp->if_next) { if (ifp->scope == IFA_LINK && !(ifp->flags & IFA_F_TENTATIVE)) { - ipv6_addr_copy(&pkt_dev-> - cur_in6_saddr, - &ifp->addr); + pkt_dev->cur_in6_saddr = ifp->addr; err = 0; break; } @@ -2145,9 +2142,12 @@ static void spin(struct pktgen_dev *pkt_dev, ktime_t spin_until) } start_time = ktime_now(); - if (remaining < 100000) - ndelay(remaining); /* really small just spin */ - else { + if (remaining < 100000) { + /* for small delays (<100us), just loop until limit is reached */ + do { + end_time = ktime_now(); + } while (ktime_lt(end_time, spin_until)); + } else { /* see do_nanosleep */ hrtimer_init_sleeper(&t, current); do { @@ -2162,8 +2162,8 @@ static void spin(struct pktgen_dev *pkt_dev, ktime_t spin_until) hrtimer_cancel(&t.timer); } while (t.task && pkt_dev->running && !signal_pending(current)); __set_current_state(TASK_RUNNING); + end_time = ktime_now(); } - end_time = ktime_now(); pkt_dev->idle_acc += ktime_to_ns(ktime_sub(end_time, start_time)); pkt_dev->next_tx = ktime_add_ns(spin_until, pkt_dev->delay); @@ -2602,18 +2602,18 @@ static void pktgen_finalize_skb(struct pktgen_dev *pkt_dev, struct sk_buff *skb, if (!pkt_dev->page) break; } - skb_shinfo(skb)->frags[i].page = pkt_dev->page; get_page(pkt_dev->page); + skb_frag_set_page(skb, i, pkt_dev->page); skb_shinfo(skb)->frags[i].page_offset = 0; /*last fragment, fill rest of data*/ if (i == (frags - 1)) - skb_shinfo(skb)->frags[i].size = - (datalen < PAGE_SIZE ? datalen : PAGE_SIZE); + skb_frag_size_set(&skb_shinfo(skb)->frags[i], + (datalen < PAGE_SIZE ? datalen : PAGE_SIZE)); else - skb_shinfo(skb)->frags[i].size = frag_len; - datalen -= skb_shinfo(skb)->frags[i].size; - skb->len += skb_shinfo(skb)->frags[i].size; - skb->data_len += skb_shinfo(skb)->frags[i].size; + skb_frag_size_set(&skb_shinfo(skb)->frags[i], frag_len); + datalen -= skb_frag_size(&skb_shinfo(skb)->frags[i]); + skb->len += skb_frag_size(&skb_shinfo(skb)->frags[i]); + skb->data_len += skb_frag_size(&skb_shinfo(skb)->frags[i]); i++; skb_shinfo(skb)->nr_frags = i; } @@ -2955,8 +2955,8 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, iph->payload_len = htons(sizeof(struct udphdr) + datalen); iph->nexthdr = IPPROTO_UDP; - ipv6_addr_copy(&iph->daddr, &pkt_dev->cur_in6_daddr); - ipv6_addr_copy(&iph->saddr, &pkt_dev->cur_in6_saddr); + iph->daddr = pkt_dev->cur_in6_daddr; + iph->saddr = pkt_dev->cur_in6_saddr; skb->mac_header = (skb->network_header - ETH_HLEN - pkt_dev->pkt_overhead); @@ -3342,7 +3342,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev) __netif_tx_lock_bh(txq); - if (unlikely(netif_tx_queue_frozen_or_stopped(txq))) { + if (unlikely(netif_xmit_frozen_or_stopped(txq))) { ret = NETDEV_TX_BUSY; pkt_dev->last_ok = 0; goto unlock; diff --git a/net/core/request_sock.c b/net/core/request_sock.c index 182236b2510a..9b570a6a33c5 100644 --- a/net/core/request_sock.c +++ b/net/core/request_sock.c @@ -26,10 +26,11 @@ * but then some measure against one socket starving all other sockets * would be needed. * - * It was 128 by default. Experiments with real servers show, that + * The minimum value of it is 128. Experiments with real servers show that * it is absolutely not enough even at 100conn/sec. 256 cures most - * of problems. This value is adjusted to 128 for very small machines - * (<=32Mb of memory) and to 1024 on normal or better ones (>=256Mb). + * of problems. + * This value is adjusted to 128 for low memory machines, + * and it will increase in proportion to the memory of machine. * Note : Dont forget somaxconn that may limit backlog too. */ int sysctl_max_syn_backlog = 256; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index d3a628196716..f16444bc6cbb 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -273,6 +273,17 @@ EXPORT_SYMBOL_GPL(rtnl_unregister_all); static LIST_HEAD(link_ops); +static const struct rtnl_link_ops *rtnl_link_ops_get(const char *kind) +{ + const struct rtnl_link_ops *ops; + + list_for_each_entry(ops, &link_ops, list) { + if (!strcmp(ops->kind, kind)) + return ops; + } + return NULL; +} + /** * __rtnl_link_register - Register rtnl_link_ops with rtnetlink. * @ops: struct rtnl_link_ops * to register @@ -285,6 +296,9 @@ static LIST_HEAD(link_ops); */ int __rtnl_link_register(struct rtnl_link_ops *ops) { + if (rtnl_link_ops_get(ops->kind)) + return -EEXIST; + if (!ops->dellink) ops->dellink = unregister_netdevice_queue; @@ -351,17 +365,6 @@ void rtnl_link_unregister(struct rtnl_link_ops *ops) } EXPORT_SYMBOL_GPL(rtnl_link_unregister); -static const struct rtnl_link_ops *rtnl_link_ops_get(const char *kind) -{ - const struct rtnl_link_ops *ops; - - list_for_each_entry(ops, &link_ops, list) { - if (!strcmp(ops->kind, kind)) - return ops; - } - return NULL; -} - static size_t rtnl_link_get_size(const struct net_device *dev) { const struct rtnl_link_ops *ops = dev->rtnl_link_ops; @@ -731,7 +734,8 @@ static inline int rtnl_vfinfo_size(const struct net_device *dev) size += num_vfs * (nla_total_size(sizeof(struct ifla_vf_mac)) + nla_total_size(sizeof(struct ifla_vf_vlan)) + - nla_total_size(sizeof(struct ifla_vf_tx_rate))); + nla_total_size(sizeof(struct ifla_vf_tx_rate)) + + nla_total_size(sizeof(struct ifla_vf_spoofchk))); return size; } else return 0; @@ -954,13 +958,27 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, struct ifla_vf_mac vf_mac; struct ifla_vf_vlan vf_vlan; struct ifla_vf_tx_rate vf_tx_rate; + struct ifla_vf_spoofchk vf_spoofchk; + + /* + * Not all SR-IOV capable drivers support the + * spoofcheck query. Preset to -1 so the user + * space tool can detect that the driver didn't + * report anything. + */ + ivi.spoofchk = -1; if (dev->netdev_ops->ndo_get_vf_config(dev, i, &ivi)) break; - vf_mac.vf = vf_vlan.vf = vf_tx_rate.vf = ivi.vf; + vf_mac.vf = + vf_vlan.vf = + vf_tx_rate.vf = + vf_spoofchk.vf = ivi.vf; + memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac)); vf_vlan.vlan = ivi.vlan; vf_vlan.qos = ivi.qos; vf_tx_rate.rate = ivi.tx_rate; + vf_spoofchk.setting = ivi.spoofchk; vf = nla_nest_start(skb, IFLA_VF_INFO); if (!vf) { nla_nest_cancel(skb, vfinfo); @@ -968,7 +986,10 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, } NLA_PUT(skb, IFLA_VF_MAC, sizeof(vf_mac), &vf_mac); NLA_PUT(skb, IFLA_VF_VLAN, sizeof(vf_vlan), &vf_vlan); - NLA_PUT(skb, IFLA_VF_TX_RATE, sizeof(vf_tx_rate), &vf_tx_rate); + NLA_PUT(skb, IFLA_VF_TX_RATE, sizeof(vf_tx_rate), + &vf_tx_rate); + NLA_PUT(skb, IFLA_VF_SPOOFCHK, sizeof(vf_spoofchk), + &vf_spoofchk); nla_nest_end(skb, vf); } nla_nest_end(skb, vfinfo); @@ -1202,6 +1223,15 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr *attr) ivt->rate); break; } + case IFLA_VF_SPOOFCHK: { + struct ifla_vf_spoofchk *ivs; + ivs = nla_data(vf); + err = -EOPNOTSUPP; + if (ops->ndo_set_vf_spoofchk) + err = ops->ndo_set_vf_spoofchk(dev, ivs->vf, + ivs->setting); + break; + } default: err = -EINVAL; break; @@ -1604,7 +1634,6 @@ struct net_device *rtnl_create_link(struct net *src_net, struct net *net, dev_net_set(dev, net); dev->rtnl_link_ops = ops; dev->rtnl_link_state = RTNL_LINK_INITIALIZING; - dev->real_num_tx_queues = real_num_queues; if (tb[IFLA_MTU]) dev->mtu = nla_get_u32(tb[IFLA_MTU]); diff --git a/net/core/scm.c b/net/core/scm.c index 811b53fb330e..ff52ad0a5150 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -173,7 +173,7 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p) if (err) goto error; - if (pid_vnr(p->pid) != p->creds.pid) { + if (!p->pid || pid_vnr(p->pid) != p->creds.pid) { struct pid *pid; err = -ESRCH; pid = find_get_pid(p->creds.pid); @@ -183,8 +183,9 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p) p->pid = pid; } - if ((p->cred->euid != p->creds.uid) || - (p->cred->egid != p->creds.gid)) { + if (!p->cred || + (p->cred->euid != p->creds.uid) || + (p->cred->egid != p->creds.gid)) { struct cred *cred; err = -ENOMEM; cred = prepare_creds(); @@ -193,7 +194,8 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p) cred->uid = cred->euid = p->creds.uid; cred->gid = cred->egid = p->creds.gid; - put_cred(p->cred); + if (p->cred) + put_cred(p->cred); p->cred = cred; } break; diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c index 45329d7c9dd9..6fd44606fdd1 100644 --- a/net/core/secure_seq.c +++ b/net/core/secure_seq.c @@ -19,6 +19,7 @@ static int __init net_secret_init(void) } late_initcall(net_secret_init); +#ifdef CONFIG_INET static u32 seq_scale(u32 seq) { /* @@ -33,9 +34,10 @@ static u32 seq_scale(u32 seq) */ return seq + (ktime_to_ns(ktime_get_real()) >> 6); } +#endif -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) -__u32 secure_tcpv6_sequence_number(__be32 *saddr, __be32 *daddr, +#if IS_ENABLED(CONFIG_IPV6) +__u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr, __be16 sport, __be16 dport) { u32 secret[MD5_MESSAGE_BYTES / 4]; @@ -132,7 +134,7 @@ u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport) EXPORT_SYMBOL_GPL(secure_ipv4_port_ephemeral); #endif -#if defined(CONFIG_IP_DCCP) || defined(CONFIG_IP_DCCP_MODULE) +#if IS_ENABLED(CONFIG_IP_DCCP) u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport) { @@ -154,7 +156,7 @@ u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr, } EXPORT_SYMBOL(secure_dccp_sequence_number); -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) u64 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr, __be16 sport, __be16 dport) { diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 387703f56fce..da0c97f2fab4 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -184,11 +184,21 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, goto out; prefetchw(skb); + /* We do our best to align skb_shared_info on a separate cache + * line. It usually works because kmalloc(X > SMP_CACHE_BYTES) gives + * aligned memory blocks, unless SLUB/SLAB debug is enabled. + * Both skb->head and skb_shared_info are cache line aligned. + */ size = SKB_DATA_ALIGN(size); - data = kmalloc_node_track_caller(size + sizeof(struct skb_shared_info), - gfp_mask, node); + size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + data = kmalloc_node_track_caller(size, gfp_mask, node); if (!data) goto nodata; + /* kmalloc(size) might give us more room than requested. + * Put skb_shared_info exactly at the end of allocated zone, + * to allow max possible filling before reallocation. + */ + size = SKB_WITH_OVERHEAD(ksize(data)); prefetchw(data + size); /* @@ -197,7 +207,8 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, * the tail pointer in struct sk_buff! */ memset(skb, 0, offsetof(struct sk_buff, tail)); - skb->truesize = size + sizeof(struct sk_buff); + /* Account for allocated memory : skb + skb->head */ + skb->truesize = SKB_TRUESIZE(size); atomic_set(&skb->users, 1); skb->head = data; skb->data = data; @@ -234,6 +245,55 @@ nodata: EXPORT_SYMBOL(__alloc_skb); /** + * build_skb - build a network buffer + * @data: data buffer provided by caller + * + * Allocate a new &sk_buff. Caller provides space holding head and + * skb_shared_info. @data must have been allocated by kmalloc() + * The return is the new skb buffer. + * On a failure the return is %NULL, and @data is not freed. + * Notes : + * Before IO, driver allocates only data buffer where NIC put incoming frame + * Driver should add room at head (NET_SKB_PAD) and + * MUST add room at tail (SKB_DATA_ALIGN(skb_shared_info)) + * After IO, driver calls build_skb(), to allocate sk_buff and populate it + * before giving packet to stack. + * RX rings only contains data buffers, not full skbs. + */ +struct sk_buff *build_skb(void *data) +{ + struct skb_shared_info *shinfo; + struct sk_buff *skb; + unsigned int size; + + skb = kmem_cache_alloc(skbuff_head_cache, GFP_ATOMIC); + if (!skb) + return NULL; + + size = ksize(data) - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + + memset(skb, 0, offsetof(struct sk_buff, tail)); + skb->truesize = SKB_TRUESIZE(size); + atomic_set(&skb->users, 1); + skb->head = data; + skb->data = data; + skb_reset_tail_pointer(skb); + skb->end = skb->tail + size; +#ifdef NET_SKBUFF_DATA_USES_OFFSET + skb->mac_header = ~0U; +#endif + + /* make sure we initialize shinfo sequentially */ + shinfo = skb_shinfo(skb); + memset(shinfo, 0, offsetof(struct skb_shared_info, dataref)); + atomic_set(&shinfo->dataref, 1); + kmemcheck_annotate_variable(shinfo->destructor_arg); + + return skb; +} +EXPORT_SYMBOL(build_skb); + +/** * __netdev_alloc_skb - allocate an skbuff for rx on a specific device * @dev: network device to receive on * @length: length to allocate @@ -326,7 +386,7 @@ static void skb_release_data(struct sk_buff *skb) if (skb_shinfo(skb)->nr_frags) { int i; for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) - put_page(skb_shinfo(skb)->frags[i].page); + skb_frag_unref(skb, i); } /* @@ -392,7 +452,7 @@ static void skb_release_head_state(struct sk_buff *skb) WARN_ON(in_irq()); skb->destructor(skb); } -#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) +#if IS_ENABLED(CONFIG_NF_CONNTRACK) nf_conntrack_put(skb->nfct); #endif #ifdef NET_SKBUFF_NF_DEFRAG_NEEDED @@ -475,6 +535,30 @@ void consume_skb(struct sk_buff *skb) EXPORT_SYMBOL(consume_skb); /** + * skb_recycle - clean up an skb for reuse + * @skb: buffer + * + * Recycles the skb to be reused as a receive buffer. This + * function does any necessary reference count dropping, and + * cleans up the skbuff as if it just came from __alloc_skb(). + */ +void skb_recycle(struct sk_buff *skb) +{ + struct skb_shared_info *shinfo; + + skb_release_head_state(skb); + + shinfo = skb_shinfo(skb); + memset(shinfo, 0, offsetof(struct skb_shared_info, dataref)); + atomic_set(&shinfo->dataref, 1); + + memset(skb, 0, offsetof(struct sk_buff, tail)); + skb->data = skb->head + NET_SKB_PAD; + skb_reset_tail_pointer(skb); +} +EXPORT_SYMBOL(skb_recycle); + +/** * skb_recycle_check - check if skb can be reused for receive * @skb: buffer * @skb_size: minimum receive buffer size @@ -488,33 +572,10 @@ EXPORT_SYMBOL(consume_skb); */ bool skb_recycle_check(struct sk_buff *skb, int skb_size) { - struct skb_shared_info *shinfo; - - if (irqs_disabled()) + if (!skb_is_recycleable(skb, skb_size)) return false; - if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) - return false; - - if (skb_is_nonlinear(skb) || skb->fclone != SKB_FCLONE_UNAVAILABLE) - return false; - - skb_size = SKB_DATA_ALIGN(skb_size + NET_SKB_PAD); - if (skb_end_pointer(skb) - skb->head < skb_size) - return false; - - if (skb_shared(skb) || skb_cloned(skb)) - return false; - - skb_release_head_state(skb); - - shinfo = skb_shinfo(skb); - memset(shinfo, 0, offsetof(struct skb_shared_info, dataref)); - atomic_set(&shinfo->dataref, 1); - - memset(skb, 0, offsetof(struct sk_buff, tail)); - skb->data = skb->head + NET_SKB_PAD; - skb_reset_tail_pointer(skb); + skb_recycle(skb); return true; } @@ -529,6 +590,8 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) new->mac_header = old->mac_header; skb_dst_copy(new, old); new->rxhash = old->rxhash; + new->ooo_okay = old->ooo_okay; + new->l4_rxhash = old->l4_rxhash; #ifdef CONFIG_XFRM new->sp = secpath_get(old->sp); #endif @@ -539,15 +602,14 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) new->ip_summed = old->ip_summed; skb_copy_queue_mapping(new, old); new->priority = old->priority; -#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) +#if IS_ENABLED(CONFIG_IP_VS) new->ipvs_property = old->ipvs_property; #endif new->protocol = old->protocol; new->mark = old->mark; new->skb_iif = old->skb_iif; __nf_copy(new, old); -#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ - defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) +#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) new->nf_trace = old->nf_trace; #endif #ifdef CONFIG_NET_SCHED @@ -647,7 +709,7 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask) } vaddr = kmap_skb_frag(&skb_shinfo(skb)->frags[i]); memcpy(page_address(page), - vaddr + f->page_offset, f->size); + vaddr + f->page_offset, skb_frag_size(f)); kunmap_skb_frag(vaddr); page->private = (unsigned long)head; head = page; @@ -655,14 +717,14 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask) /* skb frags release userspace buffers */ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) - put_page(skb_shinfo(skb)->frags[i].page); + skb_frag_unref(skb, i); uarg->callback(uarg); /* skb frags point to kernel buffers */ for (i = skb_shinfo(skb)->nr_frags; i > 0; i--) { - skb_shinfo(skb)->frags[i - 1].page_offset = 0; - skb_shinfo(skb)->frags[i - 1].page = head; + __skb_fill_page_desc(skb, i-1, head, 0, + skb_shinfo(skb)->frags[i - 1].size); head = (struct page *)head->private; } @@ -777,8 +839,9 @@ struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask) EXPORT_SYMBOL(skb_copy); /** - * pskb_copy - create copy of an sk_buff with private head. + * __pskb_copy - create copy of an sk_buff with private head. * @skb: buffer to copy + * @headroom: headroom of new skb * @gfp_mask: allocation priority * * Make a copy of both an &sk_buff and part of its data, located @@ -789,16 +852,16 @@ EXPORT_SYMBOL(skb_copy); * The returned buffer has a reference count of 1. */ -struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask) +struct sk_buff *__pskb_copy(struct sk_buff *skb, int headroom, gfp_t gfp_mask) { - unsigned int size = skb_end_pointer(skb) - skb->head; + unsigned int size = skb_headlen(skb) + headroom; struct sk_buff *n = alloc_skb(size, gfp_mask); if (!n) goto out; /* Set the data pointer */ - skb_reserve(n, skb_headroom(skb)); + skb_reserve(n, headroom); /* Set the tail pointer and length */ skb_put(n, skb_headlen(skb)); /* Copy the bytes */ @@ -820,7 +883,7 @@ struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask) } for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i]; - get_page(skb_shinfo(n)->frags[i].page); + skb_frag_ref(skb, i); } skb_shinfo(n)->nr_frags = i; } @@ -834,7 +897,7 @@ struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask) out: return n; } -EXPORT_SYMBOL(pskb_copy); +EXPORT_SYMBOL(__pskb_copy); /** * pskb_expand_head - reallocate header of &sk_buff @@ -911,7 +974,7 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, goto nofrags; } for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) - get_page(skb_shinfo(skb)->frags[i].page); + skb_frag_ref(skb, i); if (skb_has_frag_list(skb)) skb_clone_fraglist(skb); @@ -1178,20 +1241,20 @@ int ___pskb_trim(struct sk_buff *skb, unsigned int len) goto drop_pages; for (; i < nfrags; i++) { - int end = offset + skb_shinfo(skb)->frags[i].size; + int end = offset + skb_frag_size(&skb_shinfo(skb)->frags[i]); if (end < len) { offset = end; continue; } - skb_shinfo(skb)->frags[i++].size = len - offset; + skb_frag_size_set(&skb_shinfo(skb)->frags[i++], len - offset); drop_pages: skb_shinfo(skb)->nr_frags = i; for (; i < nfrags; i++) - put_page(skb_shinfo(skb)->frags[i].page); + skb_frag_unref(skb, i); if (skb_has_frag_list(skb)) skb_drop_fraglist(skb); @@ -1294,9 +1357,11 @@ unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta) /* Estimate size of pulled pages. */ eat = delta; for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { - if (skb_shinfo(skb)->frags[i].size >= eat) + int size = skb_frag_size(&skb_shinfo(skb)->frags[i]); + + if (size >= eat) goto pull_pages; - eat -= skb_shinfo(skb)->frags[i].size; + eat -= size; } /* If we need update frag list, we are in troubles. @@ -1359,14 +1424,16 @@ pull_pages: eat = delta; k = 0; for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { - if (skb_shinfo(skb)->frags[i].size <= eat) { - put_page(skb_shinfo(skb)->frags[i].page); - eat -= skb_shinfo(skb)->frags[i].size; + int size = skb_frag_size(&skb_shinfo(skb)->frags[i]); + + if (size <= eat) { + skb_frag_unref(skb, i); + eat -= size; } else { skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i]; if (eat) { skb_shinfo(skb)->frags[k].page_offset += eat; - skb_shinfo(skb)->frags[k].size -= eat; + skb_frag_size_sub(&skb_shinfo(skb)->frags[k], eat); eat = 0; } k++; @@ -1421,7 +1488,7 @@ int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len) WARN_ON(start > offset + len); - end = start + skb_shinfo(skb)->frags[i].size; + end = start + skb_frag_size(&skb_shinfo(skb)->frags[i]); if ((copy = end - offset) > 0) { u8 *vaddr; @@ -1619,7 +1686,8 @@ static int __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe, for (seg = 0; seg < skb_shinfo(skb)->nr_frags; seg++) { const skb_frag_t *f = &skb_shinfo(skb)->frags[seg]; - if (__splice_segment(f->page, f->page_offset, f->size, + if (__splice_segment(skb_frag_page(f), + f->page_offset, skb_frag_size(f), offset, len, skb, spd, 0, sk, pipe)) return 1; } @@ -1729,7 +1797,7 @@ int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len) WARN_ON(start > offset + len); - end = start + frag->size; + end = start + skb_frag_size(frag); if ((copy = end - offset) > 0) { u8 *vaddr; @@ -1802,7 +1870,7 @@ __wsum skb_checksum(const struct sk_buff *skb, int offset, WARN_ON(start > offset + len); - end = start + skb_shinfo(skb)->frags[i].size; + end = start + skb_frag_size(&skb_shinfo(skb)->frags[i]); if ((copy = end - offset) > 0) { __wsum csum2; u8 *vaddr; @@ -1877,7 +1945,7 @@ __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, WARN_ON(start > offset + len); - end = start + skb_shinfo(skb)->frags[i].size; + end = start + skb_frag_size(&skb_shinfo(skb)->frags[i]); if ((copy = end - offset) > 0) { __wsum csum2; u8 *vaddr; @@ -2150,7 +2218,7 @@ static inline void skb_split_no_header(struct sk_buff *skb, skb->data_len = len - pos; for (i = 0; i < nfrags; i++) { - int size = skb_shinfo(skb)->frags[i].size; + int size = skb_frag_size(&skb_shinfo(skb)->frags[i]); if (pos + size > len) { skb_shinfo(skb1)->frags[k] = skb_shinfo(skb)->frags[i]; @@ -2164,10 +2232,10 @@ static inline void skb_split_no_header(struct sk_buff *skb, * where splitting is expensive. * 2. Split is accurately. We make this. */ - get_page(skb_shinfo(skb)->frags[i].page); + skb_frag_ref(skb, i); skb_shinfo(skb1)->frags[0].page_offset += len - pos; - skb_shinfo(skb1)->frags[0].size -= len - pos; - skb_shinfo(skb)->frags[i].size = len - pos; + skb_frag_size_sub(&skb_shinfo(skb1)->frags[0], len - pos); + skb_frag_size_set(&skb_shinfo(skb)->frags[i], len - pos); skb_shinfo(skb)->nr_frags++; } k++; @@ -2211,7 +2279,7 @@ static int skb_prepare_for_shift(struct sk_buff *skb) * @shiftlen: shift up to this many bytes * * Attempts to shift up to shiftlen worth of bytes, which may be less than - * the length of the skb, from tgt to skb. Returns number bytes shifted. + * the length of the skb, from skb to tgt. Returns number bytes shifted. * It's up to caller to free skb if everything was shifted. * * If @tgt runs out of frags, the whole operation is aborted. @@ -2239,12 +2307,13 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen) * commit all, so that we don't have to undo partial changes */ if (!to || - !skb_can_coalesce(tgt, to, fragfrom->page, fragfrom->page_offset)) { + !skb_can_coalesce(tgt, to, skb_frag_page(fragfrom), + fragfrom->page_offset)) { merge = -1; } else { merge = to - 1; - todo -= fragfrom->size; + todo -= skb_frag_size(fragfrom); if (todo < 0) { if (skb_prepare_for_shift(skb) || skb_prepare_for_shift(tgt)) @@ -2254,8 +2323,8 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen) fragfrom = &skb_shinfo(skb)->frags[from]; fragto = &skb_shinfo(tgt)->frags[merge]; - fragto->size += shiftlen; - fragfrom->size -= shiftlen; + skb_frag_size_add(fragto, shiftlen); + skb_frag_size_sub(fragfrom, shiftlen); fragfrom->page_offset += shiftlen; goto onlymerged; @@ -2279,20 +2348,20 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen) fragfrom = &skb_shinfo(skb)->frags[from]; fragto = &skb_shinfo(tgt)->frags[to]; - if (todo >= fragfrom->size) { + if (todo >= skb_frag_size(fragfrom)) { *fragto = *fragfrom; - todo -= fragfrom->size; + todo -= skb_frag_size(fragfrom); from++; to++; } else { - get_page(fragfrom->page); + __skb_frag_ref(fragfrom); fragto->page = fragfrom->page; fragto->page_offset = fragfrom->page_offset; - fragto->size = todo; + skb_frag_size_set(fragto, todo); fragfrom->page_offset += todo; - fragfrom->size -= todo; + skb_frag_size_sub(fragfrom, todo); todo = 0; to++; @@ -2307,8 +2376,8 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen) fragfrom = &skb_shinfo(skb)->frags[0]; fragto = &skb_shinfo(tgt)->frags[merge]; - fragto->size += fragfrom->size; - put_page(fragfrom->page); + skb_frag_size_add(fragto, skb_frag_size(fragfrom)); + __skb_frag_unref(fragfrom); } /* Reposition in the original skb */ @@ -2405,7 +2474,7 @@ next_skb: while (st->frag_idx < skb_shinfo(st->cur_skb)->nr_frags) { frag = &skb_shinfo(st->cur_skb)->frags[st->frag_idx]; - block_limit = frag->size + st->stepped_offset; + block_limit = skb_frag_size(frag) + st->stepped_offset; if (abs_offset < block_limit) { if (!st->frag_data) @@ -2423,7 +2492,7 @@ next_skb: } st->frag_idx++; - st->stepped_offset += frag->size; + st->stepped_offset += skb_frag_size(frag); } if (st->frag_data) { @@ -2553,14 +2622,13 @@ int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb, left = PAGE_SIZE - frag->page_offset; copy = (length > left)? left : length; - ret = getfrag(from, (page_address(frag->page) + - frag->page_offset + frag->size), + ret = getfrag(from, skb_frag_address(frag) + skb_frag_size(frag), offset, copy, 0, skb); if (ret < 0) return -EFAULT; /* copy was successful so update the size parameters */ - frag->size += copy; + skb_frag_size_add(frag, copy); skb->len += copy; skb->data_len += copy; offset += copy; @@ -2602,7 +2670,7 @@ EXPORT_SYMBOL_GPL(skb_pull_rcsum); * a pointer to the first in a list of new skbs for the segments. * In case of error it returns ERR_PTR(err). */ -struct sk_buff *skb_segment(struct sk_buff *skb, u32 features) +struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) { struct sk_buff *segs = NULL; struct sk_buff *tail = NULL; @@ -2706,12 +2774,12 @@ struct sk_buff *skb_segment(struct sk_buff *skb, u32 features) while (pos < offset + len && i < nfrags) { *frag = skb_shinfo(skb)->frags[i]; - get_page(frag->page); - size = frag->size; + __skb_frag_ref(frag); + size = skb_frag_size(frag); if (pos < offset) { frag->page_offset += offset - pos; - frag->size -= offset - pos; + skb_frag_size_sub(frag, offset - pos); } skb_shinfo(nskb)->nr_frags++; @@ -2720,7 +2788,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, u32 features) i++; pos += size; } else { - frag->size -= pos + size - (offset + len); + skb_frag_size_sub(frag, pos + size - (offset + len)); goto skip_fraglist; } @@ -2800,7 +2868,7 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb) } while (--i); frag->page_offset += offset; - frag->size -= offset; + skb_frag_size_sub(frag, offset); skb->truesize -= skb->data_len; skb->len -= skb->data_len; @@ -2852,7 +2920,7 @@ merge: unsigned int eat = offset - headlen; skbinfo->frags[0].page_offset += eat; - skbinfo->frags[0].size -= eat; + skb_frag_size_sub(&skbinfo->frags[0], eat); skb->data_len -= eat; skb->len -= eat; offset = headlen; @@ -2923,13 +2991,13 @@ __skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len) WARN_ON(start > offset + len); - end = start + skb_shinfo(skb)->frags[i].size; + end = start + skb_frag_size(&skb_shinfo(skb)->frags[i]); if ((copy = end - offset) > 0) { skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; if (copy > len) copy = len; - sg_set_page(&sg[elt], frag->page, copy, + sg_set_page(&sg[elt], skb_frag_page(frag), copy, frag->page_offset+offset-start); elt++; if (!(len -= copy)) @@ -3150,6 +3218,26 @@ void skb_tstamp_tx(struct sk_buff *orig_skb, } EXPORT_SYMBOL_GPL(skb_tstamp_tx); +void skb_complete_wifi_ack(struct sk_buff *skb, bool acked) +{ + struct sock *sk = skb->sk; + struct sock_exterr_skb *serr; + int err; + + skb->wifi_acked_valid = 1; + skb->wifi_acked = acked; + + serr = SKB_EXT_ERR(skb); + memset(serr, 0, sizeof(*serr)); + serr->ee.ee_errno = ENOMSG; + serr->ee.ee_origin = SO_EE_ORIGIN_TXSTATUS; + + err = sock_queue_err_skb(sk, skb); + if (err) + kfree_skb(skb); +} +EXPORT_SYMBOL_GPL(skb_complete_wifi_ack); + /** * skb_partial_csum_set - set up and verify partial csum values for packet diff --git a/net/core/sock.c b/net/core/sock.c index bc745d00ea4d..5c5af9988f94 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -111,6 +111,8 @@ #include <linux/init.h> #include <linux/highmem.h> #include <linux/user_namespace.h> +#include <linux/jump_label.h> +#include <linux/memcontrol.h> #include <asm/uaccess.h> #include <asm/system.h> @@ -125,6 +127,7 @@ #include <net/xfrm.h> #include <linux/ipsec.h> #include <net/cls_cgroup.h> +#include <net/netprio_cgroup.h> #include <linux/filter.h> @@ -134,6 +137,46 @@ #include <net/tcp.h> #endif +static DEFINE_MUTEX(proto_list_mutex); +static LIST_HEAD(proto_list); + +#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM +int mem_cgroup_sockets_init(struct cgroup *cgrp, struct cgroup_subsys *ss) +{ + struct proto *proto; + int ret = 0; + + mutex_lock(&proto_list_mutex); + list_for_each_entry(proto, &proto_list, node) { + if (proto->init_cgroup) { + ret = proto->init_cgroup(cgrp, ss); + if (ret) + goto out; + } + } + + mutex_unlock(&proto_list_mutex); + return ret; +out: + list_for_each_entry_continue_reverse(proto, &proto_list, node) + if (proto->destroy_cgroup) + proto->destroy_cgroup(cgrp, ss); + mutex_unlock(&proto_list_mutex); + return ret; +} + +void mem_cgroup_sockets_destroy(struct cgroup *cgrp, struct cgroup_subsys *ss) +{ + struct proto *proto; + + mutex_lock(&proto_list_mutex); + list_for_each_entry_reverse(proto, &proto_list, node) + if (proto->destroy_cgroup) + proto->destroy_cgroup(cgrp, ss); + mutex_unlock(&proto_list_mutex); +} +#endif + /* * Each address family might have different locking rules, so we have * one slock key per address family: @@ -141,6 +184,9 @@ static struct lock_class_key af_family_keys[AF_MAX]; static struct lock_class_key af_family_slock_keys[AF_MAX]; +struct jump_label_key memcg_socket_limit_enabled; +EXPORT_SYMBOL(memcg_socket_limit_enabled); + /* * Make lock validator output more readable. (we pre-construct these * strings build-time, so that runtime initialization of socket @@ -207,7 +253,7 @@ static struct lock_class_key af_callback_keys[AF_MAX]; * not depend upon such differences. */ #define _SK_MEM_PACKETS 256 -#define _SK_MEM_OVERHEAD (sizeof(struct sk_buff) + 256) +#define _SK_MEM_OVERHEAD SKB_TRUESIZE(256) #define SK_WMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS) #define SK_RMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS) @@ -221,10 +267,16 @@ __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX; int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512); EXPORT_SYMBOL(sysctl_optmem_max); -#if defined(CONFIG_CGROUPS) && !defined(CONFIG_NET_CLS_CGROUP) +#if defined(CONFIG_CGROUPS) +#if !defined(CONFIG_NET_CLS_CGROUP) int net_cls_subsys_id = -1; EXPORT_SYMBOL_GPL(net_cls_subsys_id); #endif +#if !defined(CONFIG_NETPRIO_CGROUP) +int net_prio_subsys_id = -1; +EXPORT_SYMBOL_GPL(net_prio_subsys_id); +#endif +#endif static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen) { @@ -269,14 +321,14 @@ static void sock_warn_obsolete_bsdism(const char *name) } } -static void sock_disable_timestamp(struct sock *sk, int flag) +#define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE)) + +static void sock_disable_timestamp(struct sock *sk, unsigned long flags) { - if (sock_flag(sk, flag)) { - sock_reset_flag(sk, flag); - if (!sock_flag(sk, SOCK_TIMESTAMP) && - !sock_flag(sk, SOCK_TIMESTAMPING_RX_SOFTWARE)) { + if (sk->sk_flags & flags) { + sk->sk_flags &= ~flags; + if (!(sk->sk_flags & SK_FLAGS_TIMESTAMP)) net_disable_timestamp(); - } } } @@ -288,11 +340,7 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) unsigned long flags; struct sk_buff_head *list = &sk->sk_receive_queue; - /* Cast sk->rcvbuf to unsigned... It's pointless, but reduces - number of warnings when compiling with -W --ANK - */ - if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >= - (unsigned)sk->sk_rcvbuf) { + if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) { atomic_inc(&sk->sk_drops); trace_sock_rcvqueue_full(sk, skb); return -ENOMEM; @@ -387,7 +435,7 @@ struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie) if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) { sk_tx_queue_clear(sk); - rcu_assign_pointer(sk->sk_dst_cache, NULL); + RCU_INIT_POINTER(sk->sk_dst_cache, NULL); dst_release(dst); return NULL; } @@ -682,7 +730,7 @@ set_rcvbuf: SOCK_TIMESTAMPING_RX_SOFTWARE); else sock_disable_timestamp(sk, - SOCK_TIMESTAMPING_RX_SOFTWARE); + (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE)); sock_valbool_flag(sk, SOCK_TIMESTAMPING_SOFTWARE, val & SOF_TIMESTAMPING_SOFTWARE); sock_valbool_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE, @@ -738,11 +786,13 @@ set_rcvbuf: /* We implement the SO_SNDLOWAT etc to not be settable (1003.1g 5.3) */ case SO_RXQ_OVFL: - if (valbool) - sock_set_flag(sk, SOCK_RXQ_OVFL); - else - sock_reset_flag(sk, SOCK_RXQ_OVFL); + sock_valbool_flag(sk, SOCK_RXQ_OVFL, valbool); break; + + case SO_WIFI_STATUS: + sock_valbool_flag(sk, SOCK_WIFI_STATUS, valbool); + break; + default: ret = -ENOPROTOOPT; break; @@ -964,6 +1014,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname, v.val = !!sock_flag(sk, SOCK_RXQ_OVFL); break; + case SO_WIFI_STATUS: + v.val = !!sock_flag(sk, SOCK_WIFI_STATUS); + break; + default: return -ENOPROTOOPT; } @@ -1114,6 +1168,18 @@ void sock_update_classid(struct sock *sk) sk->sk_classid = classid; } EXPORT_SYMBOL(sock_update_classid); + +void sock_update_netprioidx(struct sock *sk) +{ + struct cgroup_netprio_state *state; + if (in_interrupt()) + return; + rcu_read_lock(); + state = task_netprio_state(current); + sk->sk_cgrp_prioidx = state ? state->prioidx : 0; + rcu_read_unlock(); +} +EXPORT_SYMBOL_GPL(sock_update_netprioidx); #endif /** @@ -1141,6 +1207,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, atomic_set(&sk->sk_wmem_alloc, 1); sock_update_classid(sk); + sock_update_netprioidx(sk); } return sk; @@ -1158,11 +1225,10 @@ static void __sk_free(struct sock *sk) atomic_read(&sk->sk_wmem_alloc) == 0); if (filter) { sk_filter_uncharge(sk, filter); - rcu_assign_pointer(sk->sk_filter, NULL); + RCU_INIT_POINTER(sk->sk_filter, NULL); } - sock_disable_timestamp(sk, SOCK_TIMESTAMP); - sock_disable_timestamp(sk, SOCK_TIMESTAMPING_RX_SOFTWARE); + sock_disable_timestamp(sk, SK_FLAGS_TIMESTAMP); if (atomic_read(&sk->sk_omem_alloc)) printk(KERN_DEBUG "%s: optmem leakage (%d bytes) detected.\n", @@ -1207,7 +1273,20 @@ void sk_release_kernel(struct sock *sk) } EXPORT_SYMBOL(sk_release_kernel); -struct sock *sk_clone(const struct sock *sk, const gfp_t priority) +static void sk_update_clone(const struct sock *sk, struct sock *newsk) +{ + if (mem_cgroup_sockets_enabled && sk->sk_cgrp) + sock_update_memcg(newsk); +} + +/** + * sk_clone_lock - clone a socket, and lock its clone + * @sk: the socket to clone + * @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc) + * + * Caller must unlock socket even in error path (bh_unlock_sock(newsk)) + */ +struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) { struct sock *newsk; @@ -1260,6 +1339,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority) /* It is still raw copy of parent, so invalidate * destructor and make plain sk_free() */ newsk->sk_destruct = NULL; + bh_unlock_sock(newsk); sk_free(newsk); newsk = NULL; goto out; @@ -1289,17 +1369,18 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority) sk_set_socket(newsk, NULL); newsk->sk_wq = NULL; + sk_update_clone(sk, newsk); + if (newsk->sk_prot->sockets_allocated) - percpu_counter_inc(newsk->sk_prot->sockets_allocated); + sk_sockets_allocated_inc(newsk); - if (sock_flag(newsk, SOCK_TIMESTAMP) || - sock_flag(newsk, SOCK_TIMESTAMPING_RX_SOFTWARE)) + if (newsk->sk_flags & SK_FLAGS_TIMESTAMP) net_enable_timestamp(); } out: return newsk; } -EXPORT_SYMBOL_GPL(sk_clone); +EXPORT_SYMBOL_GPL(sk_clone_lock); void sk_setup_caps(struct sock *sk, struct dst_entry *dst) { @@ -1533,7 +1614,6 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len, skb_shinfo(skb)->nr_frags = npages; for (i = 0; i < npages; i++) { struct page *page; - skb_frag_t *frag; page = alloc_pages(sk->sk_allocation, 0); if (!page) { @@ -1543,12 +1623,11 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len, goto failure; } - frag = &skb_shinfo(skb)->frags[i]; - frag->page = page; - frag->page_offset = 0; - frag->size = (data_len >= PAGE_SIZE ? - PAGE_SIZE : - data_len); + __skb_fill_page_desc(skb, i, + page, 0, + (data_len >= PAGE_SIZE ? + PAGE_SIZE : + data_len)); data_len -= PAGE_SIZE; } @@ -1681,30 +1760,34 @@ int __sk_mem_schedule(struct sock *sk, int size, int kind) struct proto *prot = sk->sk_prot; int amt = sk_mem_pages(size); long allocated; + int parent_status = UNDER_LIMIT; sk->sk_forward_alloc += amt * SK_MEM_QUANTUM; - allocated = atomic_long_add_return(amt, prot->memory_allocated); + + allocated = sk_memory_allocated_add(sk, amt, &parent_status); /* Under limit. */ - if (allocated <= prot->sysctl_mem[0]) { - if (prot->memory_pressure && *prot->memory_pressure) - *prot->memory_pressure = 0; + if (parent_status == UNDER_LIMIT && + allocated <= sk_prot_mem_limits(sk, 0)) { + sk_leave_memory_pressure(sk); return 1; } - /* Under pressure. */ - if (allocated > prot->sysctl_mem[1]) - if (prot->enter_memory_pressure) - prot->enter_memory_pressure(sk); + /* Under pressure. (we or our parents) */ + if ((parent_status > SOFT_LIMIT) || + allocated > sk_prot_mem_limits(sk, 1)) + sk_enter_memory_pressure(sk); - /* Over hard limit. */ - if (allocated > prot->sysctl_mem[2]) + /* Over hard limit (we or our parents) */ + if ((parent_status == OVER_LIMIT) || + (allocated > sk_prot_mem_limits(sk, 2))) goto suppress_allocation; /* guarantee minimum buffer size under pressure */ if (kind == SK_MEM_RECV) { if (atomic_read(&sk->sk_rmem_alloc) < prot->sysctl_rmem[0]) return 1; + } else { /* SK_MEM_SEND */ if (sk->sk_type == SOCK_STREAM) { if (sk->sk_wmem_queued < prot->sysctl_wmem[0]) @@ -1714,13 +1797,13 @@ int __sk_mem_schedule(struct sock *sk, int size, int kind) return 1; } - if (prot->memory_pressure) { + if (sk_has_memory_pressure(sk)) { int alloc; - if (!*prot->memory_pressure) + if (!sk_under_memory_pressure(sk)) return 1; - alloc = percpu_counter_read_positive(prot->sockets_allocated); - if (prot->sysctl_mem[2] > alloc * + alloc = sk_sockets_allocated_read_positive(sk); + if (sk_prot_mem_limits(sk, 2) > alloc * sk_mem_pages(sk->sk_wmem_queued + atomic_read(&sk->sk_rmem_alloc) + sk->sk_forward_alloc)) @@ -1743,7 +1826,9 @@ suppress_allocation: /* Alas. Undo changes. */ sk->sk_forward_alloc -= amt * SK_MEM_QUANTUM; - atomic_long_sub(amt, prot->memory_allocated); + + sk_memory_allocated_sub(sk, amt, parent_status); + return 0; } EXPORT_SYMBOL(__sk_mem_schedule); @@ -1754,15 +1839,13 @@ EXPORT_SYMBOL(__sk_mem_schedule); */ void __sk_mem_reclaim(struct sock *sk) { - struct proto *prot = sk->sk_prot; - - atomic_long_sub(sk->sk_forward_alloc >> SK_MEM_QUANTUM_SHIFT, - prot->memory_allocated); + sk_memory_allocated_sub(sk, + sk->sk_forward_alloc >> SK_MEM_QUANTUM_SHIFT, 0); sk->sk_forward_alloc &= SK_MEM_QUANTUM - 1; - if (prot->memory_pressure && *prot->memory_pressure && - (atomic_long_read(prot->memory_allocated) < prot->sysctl_mem[0])) - *prot->memory_pressure = 0; + if (sk_under_memory_pressure(sk) && + (sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0))) + sk_leave_memory_pressure(sk); } EXPORT_SYMBOL(__sk_mem_reclaim); @@ -2133,16 +2216,15 @@ EXPORT_SYMBOL(sock_get_timestampns); void sock_enable_timestamp(struct sock *sk, int flag) { if (!sock_flag(sk, flag)) { + unsigned long previous_flags = sk->sk_flags; + sock_set_flag(sk, flag); /* * we just set one of the two flags which require net * time stamping, but time stamping might have been on * already because of the other one */ - if (!sock_flag(sk, - flag == SOCK_TIMESTAMP ? - SOCK_TIMESTAMPING_RX_SOFTWARE : - SOCK_TIMESTAMP)) + if (!(previous_flags & SK_FLAGS_TIMESTAMP)) net_enable_timestamp(); } } @@ -2254,9 +2336,6 @@ void sk_common_release(struct sock *sk) } EXPORT_SYMBOL(sk_common_release); -static DEFINE_RWLOCK(proto_list_lock); -static LIST_HEAD(proto_list); - #ifdef CONFIG_PROC_FS #define PROTO_INUSE_NR 64 /* should be enough for the first time */ struct prot_inuse { @@ -2405,10 +2484,10 @@ int proto_register(struct proto *prot, int alloc_slab) } } - write_lock(&proto_list_lock); + mutex_lock(&proto_list_mutex); list_add(&prot->node, &proto_list); assign_proto_idx(prot); - write_unlock(&proto_list_lock); + mutex_unlock(&proto_list_mutex); return 0; out_free_timewait_sock_slab_name: @@ -2431,10 +2510,10 @@ EXPORT_SYMBOL(proto_register); void proto_unregister(struct proto *prot) { - write_lock(&proto_list_lock); + mutex_lock(&proto_list_mutex); release_proto_idx(prot); list_del(&prot->node); - write_unlock(&proto_list_lock); + mutex_unlock(&proto_list_mutex); if (prot->slab != NULL) { kmem_cache_destroy(prot->slab); @@ -2457,9 +2536,9 @@ EXPORT_SYMBOL(proto_unregister); #ifdef CONFIG_PROC_FS static void *proto_seq_start(struct seq_file *seq, loff_t *pos) - __acquires(proto_list_lock) + __acquires(proto_list_mutex) { - read_lock(&proto_list_lock); + mutex_lock(&proto_list_mutex); return seq_list_start_head(&proto_list, *pos); } @@ -2469,25 +2548,36 @@ static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos) } static void proto_seq_stop(struct seq_file *seq, void *v) - __releases(proto_list_lock) + __releases(proto_list_mutex) { - read_unlock(&proto_list_lock); + mutex_unlock(&proto_list_mutex); } static char proto_method_implemented(const void *method) { return method == NULL ? 'n' : 'y'; } +static long sock_prot_memory_allocated(struct proto *proto) +{ + return proto->memory_allocated != NULL ? proto_memory_allocated(proto): -1L; +} + +static char *sock_prot_memory_pressure(struct proto *proto) +{ + return proto->memory_pressure != NULL ? + proto_memory_pressure(proto) ? "yes" : "no" : "NI"; +} static void proto_seq_printf(struct seq_file *seq, struct proto *proto) { + seq_printf(seq, "%-9s %4u %6d %6ld %-3s %6u %-3s %-10s " "%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n", proto->name, proto->obj_size, sock_prot_inuse_get(seq_file_net(seq), proto), - proto->memory_allocated != NULL ? atomic_long_read(proto->memory_allocated) : -1L, - proto->memory_pressure != NULL ? *proto->memory_pressure ? "yes" : "no" : "NI", + sock_prot_memory_allocated(proto), + sock_prot_memory_pressure(proto), proto->max_header, proto->slab == NULL ? "no" : "yes", module_name(proto->owner), diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c new file mode 100644 index 000000000000..b9868e1fd62c --- /dev/null +++ b/net/core/sock_diag.c @@ -0,0 +1,192 @@ +#include <linux/mutex.h> +#include <linux/socket.h> +#include <linux/skbuff.h> +#include <net/netlink.h> +#include <net/net_namespace.h> +#include <linux/module.h> +#include <linux/rtnetlink.h> +#include <net/sock.h> + +#include <linux/inet_diag.h> +#include <linux/sock_diag.h> + +static struct sock_diag_handler *sock_diag_handlers[AF_MAX]; +static int (*inet_rcv_compat)(struct sk_buff *skb, struct nlmsghdr *nlh); +static DEFINE_MUTEX(sock_diag_table_mutex); + +int sock_diag_check_cookie(void *sk, __u32 *cookie) +{ + if ((cookie[0] != INET_DIAG_NOCOOKIE || + cookie[1] != INET_DIAG_NOCOOKIE) && + ((u32)(unsigned long)sk != cookie[0] || + (u32)((((unsigned long)sk) >> 31) >> 1) != cookie[1])) + return -ESTALE; + else + return 0; +} +EXPORT_SYMBOL_GPL(sock_diag_check_cookie); + +void sock_diag_save_cookie(void *sk, __u32 *cookie) +{ + cookie[0] = (u32)(unsigned long)sk; + cookie[1] = (u32)(((unsigned long)sk >> 31) >> 1); +} +EXPORT_SYMBOL_GPL(sock_diag_save_cookie); + +int sock_diag_put_meminfo(struct sock *sk, struct sk_buff *skb, int attrtype) +{ + __u32 *mem; + + mem = RTA_DATA(__RTA_PUT(skb, attrtype, SK_MEMINFO_VARS * sizeof(__u32))); + + mem[SK_MEMINFO_RMEM_ALLOC] = sk_rmem_alloc_get(sk); + mem[SK_MEMINFO_RCVBUF] = sk->sk_rcvbuf; + mem[SK_MEMINFO_WMEM_ALLOC] = sk_wmem_alloc_get(sk); + mem[SK_MEMINFO_SNDBUF] = sk->sk_sndbuf; + mem[SK_MEMINFO_FWD_ALLOC] = sk->sk_forward_alloc; + mem[SK_MEMINFO_WMEM_QUEUED] = sk->sk_wmem_queued; + mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc); + + return 0; + +rtattr_failure: + return -EMSGSIZE; +} +EXPORT_SYMBOL_GPL(sock_diag_put_meminfo); + +void sock_diag_register_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh)) +{ + mutex_lock(&sock_diag_table_mutex); + inet_rcv_compat = fn; + mutex_unlock(&sock_diag_table_mutex); +} +EXPORT_SYMBOL_GPL(sock_diag_register_inet_compat); + +void sock_diag_unregister_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh)) +{ + mutex_lock(&sock_diag_table_mutex); + inet_rcv_compat = NULL; + mutex_unlock(&sock_diag_table_mutex); +} +EXPORT_SYMBOL_GPL(sock_diag_unregister_inet_compat); + +int sock_diag_register(struct sock_diag_handler *hndl) +{ + int err = 0; + + if (hndl->family >= AF_MAX) + return -EINVAL; + + mutex_lock(&sock_diag_table_mutex); + if (sock_diag_handlers[hndl->family]) + err = -EBUSY; + else + sock_diag_handlers[hndl->family] = hndl; + mutex_unlock(&sock_diag_table_mutex); + + return err; +} +EXPORT_SYMBOL_GPL(sock_diag_register); + +void sock_diag_unregister(struct sock_diag_handler *hnld) +{ + int family = hnld->family; + + if (family >= AF_MAX) + return; + + mutex_lock(&sock_diag_table_mutex); + BUG_ON(sock_diag_handlers[family] != hnld); + sock_diag_handlers[family] = NULL; + mutex_unlock(&sock_diag_table_mutex); +} +EXPORT_SYMBOL_GPL(sock_diag_unregister); + +static inline struct sock_diag_handler *sock_diag_lock_handler(int family) +{ + if (sock_diag_handlers[family] == NULL) + request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK, + NETLINK_SOCK_DIAG, family); + + mutex_lock(&sock_diag_table_mutex); + return sock_diag_handlers[family]; +} + +static inline void sock_diag_unlock_handler(struct sock_diag_handler *h) +{ + mutex_unlock(&sock_diag_table_mutex); +} + +static int __sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) +{ + int err; + struct sock_diag_req *req = NLMSG_DATA(nlh); + struct sock_diag_handler *hndl; + + if (nlmsg_len(nlh) < sizeof(*req)) + return -EINVAL; + + hndl = sock_diag_lock_handler(req->sdiag_family); + if (hndl == NULL) + err = -ENOENT; + else + err = hndl->dump(skb, nlh); + sock_diag_unlock_handler(hndl); + + return err; +} + +static int sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) +{ + int ret; + + switch (nlh->nlmsg_type) { + case TCPDIAG_GETSOCK: + case DCCPDIAG_GETSOCK: + if (inet_rcv_compat == NULL) + request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK, + NETLINK_SOCK_DIAG, AF_INET); + + mutex_lock(&sock_diag_table_mutex); + if (inet_rcv_compat != NULL) + ret = inet_rcv_compat(skb, nlh); + else + ret = -EOPNOTSUPP; + mutex_unlock(&sock_diag_table_mutex); + + return ret; + case SOCK_DIAG_BY_FAMILY: + return __sock_diag_rcv_msg(skb, nlh); + default: + return -EINVAL; + } +} + +static DEFINE_MUTEX(sock_diag_mutex); + +static void sock_diag_rcv(struct sk_buff *skb) +{ + mutex_lock(&sock_diag_mutex); + netlink_rcv_skb(skb, &sock_diag_rcv_msg); + mutex_unlock(&sock_diag_mutex); +} + +struct sock *sock_diag_nlsk; +EXPORT_SYMBOL_GPL(sock_diag_nlsk); + +static int __init sock_diag_init(void) +{ + sock_diag_nlsk = netlink_kernel_create(&init_net, NETLINK_SOCK_DIAG, 0, + sock_diag_rcv, NULL, THIS_MODULE); + return sock_diag_nlsk == NULL ? -ENOMEM : 0; +} + +static void __exit sock_diag_exit(void) +{ + netlink_kernel_release(sock_diag_nlsk); +} + +module_init(sock_diag_init); +module_exit(sock_diag_exit); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_SOCK_DIAG); diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 77a65f031488..d05559d4d9cd 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -68,8 +68,13 @@ static int rps_sock_flow_sysctl(ctl_table *table, int write, if (sock_table != orig_sock_table) { rcu_assign_pointer(rps_sock_flow_table, sock_table); - synchronize_rcu(); - vfree(orig_sock_table); + if (sock_table) + jump_label_inc(&rps_needed); + if (orig_sock_table) { + jump_label_dec(&rps_needed); + synchronize_rcu(); + vfree(orig_sock_table); + } } } diff --git a/net/core/timestamping.c b/net/core/timestamping.c index 98a52640e7cd..661b5a40ec10 100644 --- a/net/core/timestamping.c +++ b/net/core/timestamping.c @@ -21,6 +21,7 @@ #include <linux/phy.h> #include <linux/ptp_classify.h> #include <linux/skbuff.h> +#include <linux/export.h> static struct sock_filter ptp_filter[] = { PTP_FILTER @@ -57,9 +58,13 @@ void skb_clone_tx_timestamp(struct sk_buff *skb) case PTP_CLASS_V2_VLAN: phydev = skb->dev->phydev; if (likely(phydev->drv->txtstamp)) { + if (!atomic_inc_not_zero(&sk->sk_refcnt)) + return; clone = skb_clone(skb, GFP_ATOMIC); - if (!clone) + if (!clone) { + sock_put(sk); return; + } clone->sk = sk; phydev->drv->txtstamp(phydev, clone, type); } @@ -77,8 +82,11 @@ void skb_complete_tx_timestamp(struct sk_buff *skb, struct sock_exterr_skb *serr; int err; - if (!hwtstamps) + if (!hwtstamps) { + sock_put(sk); + kfree_skb(skb); return; + } *skb_hwtstamps(skb) = *hwtstamps; serr = SKB_EXT_ERR(skb); @@ -87,6 +95,7 @@ void skb_complete_tx_timestamp(struct sk_buff *skb, serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING; skb->sk = NULL; err = sock_queue_err_skb(sk, skb); + sock_put(sk); if (err) kfree_skb(skb); } diff --git a/net/core/user_dma.c b/net/core/user_dma.c index 25d717ebc92e..1b5fefdb8198 100644 --- a/net/core/user_dma.c +++ b/net/core/user_dma.c @@ -27,6 +27,7 @@ #include <linux/dmaengine.h> #include <linux/socket.h> +#include <linux/export.h> #include <net/tcp.h> #include <net/netdma.h> @@ -71,14 +72,14 @@ int dma_skb_copy_datagram_iovec(struct dma_chan *chan, /* Copy paged appendix. Hmm... why does this look so complicated? */ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { int end; + const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; WARN_ON(start > offset + len); - end = start + skb_shinfo(skb)->frags[i].size; + end = start + skb_frag_size(frag); copy = end - offset; if (copy > 0) { - skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - struct page *page = frag->page; + struct page *page = skb_frag_page(frag); if (copy > len) copy = len; diff --git a/net/dcb/dcbevent.c b/net/dcb/dcbevent.c index 665a8802105a..1d9eb7c60a68 100644 --- a/net/dcb/dcbevent.c +++ b/net/dcb/dcbevent.c @@ -19,6 +19,7 @@ #include <linux/rtnetlink.h> #include <linux/notifier.h> +#include <linux/export.h> static ATOMIC_NOTIFIER_HEAD(dcbevent_notif_chain); diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c index 3cb56af4e13c..d86053002c16 100644 --- a/net/dcb/dcbnl.c +++ b/net/dcb/dcbnl.c @@ -25,6 +25,7 @@ #include <linux/dcbnl.h> #include <net/dcbevent.h> #include <linux/rtnetlink.h> +#include <linux/module.h> #include <net/sock.h> /** @@ -1255,7 +1256,7 @@ static int dcbnl_ieee_fill(struct sk_buff *skb, struct net_device *netdev) spin_lock(&dcb_lock); list_for_each_entry(itr, &dcb_app_list, list) { - if (strncmp(itr->name, netdev->name, IFNAMSIZ) == 0) { + if (itr->ifindex == netdev->ifindex) { err = nla_put(skb, DCB_ATTR_IEEE_APP, sizeof(itr->app), &itr->app); if (err) { @@ -1412,7 +1413,7 @@ static int dcbnl_cee_fill(struct sk_buff *skb, struct net_device *netdev) goto dcb_unlock; list_for_each_entry(itr, &dcb_app_list, list) { - if (strncmp(itr->name, netdev->name, IFNAMSIZ) == 0) { + if (itr->ifindex == netdev->ifindex) { struct nlattr *app_nest = nla_nest_start(skb, DCB_ATTR_APP); if (!app_nest) @@ -2050,7 +2051,7 @@ u8 dcb_getapp(struct net_device *dev, struct dcb_app *app) list_for_each_entry(itr, &dcb_app_list, list) { if (itr->app.selector == app->selector && itr->app.protocol == app->protocol && - (strncmp(itr->name, dev->name, IFNAMSIZ) == 0)) { + itr->ifindex == dev->ifindex) { prio = itr->app.priority; break; } @@ -2073,15 +2074,17 @@ int dcb_setapp(struct net_device *dev, struct dcb_app *new) struct dcb_app_type *itr; struct dcb_app_type event; - memcpy(&event.name, dev->name, sizeof(event.name)); + event.ifindex = dev->ifindex; memcpy(&event.app, new, sizeof(event.app)); + if (dev->dcbnl_ops->getdcbx) + event.dcbx = dev->dcbnl_ops->getdcbx(dev); spin_lock(&dcb_lock); /* Search for existing match and replace */ list_for_each_entry(itr, &dcb_app_list, list) { if (itr->app.selector == new->selector && itr->app.protocol == new->protocol && - (strncmp(itr->name, dev->name, IFNAMSIZ) == 0)) { + itr->ifindex == dev->ifindex) { if (new->priority) itr->app.priority = new->priority; else { @@ -2101,7 +2104,7 @@ int dcb_setapp(struct net_device *dev, struct dcb_app *new) } memcpy(&entry->app, new, sizeof(*new)); - strncpy(entry->name, dev->name, IFNAMSIZ); + entry->ifindex = dev->ifindex; list_add(&entry->list, &dcb_app_list); } out: @@ -2127,7 +2130,7 @@ u8 dcb_ieee_getapp_mask(struct net_device *dev, struct dcb_app *app) list_for_each_entry(itr, &dcb_app_list, list) { if (itr->app.selector == app->selector && itr->app.protocol == app->protocol && - (strncmp(itr->name, dev->name, IFNAMSIZ) == 0)) { + itr->ifindex == dev->ifindex) { prio |= 1 << itr->app.priority; } } @@ -2150,8 +2153,10 @@ int dcb_ieee_setapp(struct net_device *dev, struct dcb_app *new) struct dcb_app_type event; int err = 0; - memcpy(&event.name, dev->name, sizeof(event.name)); + event.ifindex = dev->ifindex; memcpy(&event.app, new, sizeof(event.app)); + if (dev->dcbnl_ops->getdcbx) + event.dcbx = dev->dcbnl_ops->getdcbx(dev); spin_lock(&dcb_lock); /* Search for existing match and abort if found */ @@ -2159,7 +2164,7 @@ int dcb_ieee_setapp(struct net_device *dev, struct dcb_app *new) if (itr->app.selector == new->selector && itr->app.protocol == new->protocol && itr->app.priority == new->priority && - (strncmp(itr->name, dev->name, IFNAMSIZ) == 0)) { + itr->ifindex == dev->ifindex) { err = -EEXIST; goto out; } @@ -2173,7 +2178,7 @@ int dcb_ieee_setapp(struct net_device *dev, struct dcb_app *new) } memcpy(&entry->app, new, sizeof(*new)); - strncpy(entry->name, dev->name, IFNAMSIZ); + entry->ifindex = dev->ifindex; list_add(&entry->list, &dcb_app_list); out: spin_unlock(&dcb_lock); @@ -2194,8 +2199,10 @@ int dcb_ieee_delapp(struct net_device *dev, struct dcb_app *del) struct dcb_app_type event; int err = -ENOENT; - memcpy(&event.name, dev->name, sizeof(event.name)); + event.ifindex = dev->ifindex; memcpy(&event.app, del, sizeof(event.app)); + if (dev->dcbnl_ops->getdcbx) + event.dcbx = dev->dcbnl_ops->getdcbx(dev); spin_lock(&dcb_lock); /* Search for existing match and remove it. */ @@ -2203,7 +2210,7 @@ int dcb_ieee_delapp(struct net_device *dev, struct dcb_app *del) if (itr->app.selector == del->selector && itr->app.protocol == del->protocol && itr->app.priority == del->priority && - (strncmp(itr->name, dev->name, IFNAMSIZ) == 0)) { + itr->ifindex == dev->ifindex) { list_del(&itr->list); kfree(itr); err = 0; diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c index 25b7a8d1ad58..ba07824af4c0 100644 --- a/net/dccp/ackvec.c +++ b/net/dccp/ackvec.c @@ -12,6 +12,7 @@ #include "dccp.h" #include <linux/kernel.h> #include <linux/slab.h> +#include <linux/export.h> static struct kmem_cache *dccp_ackvec_slab; static struct kmem_cache *dccp_ackvec_record_slab; diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c index 0462040fc818..f053198e730c 100644 --- a/net/dccp/ccids/ccid2.c +++ b/net/dccp/ccids/ccid2.c @@ -29,7 +29,7 @@ #ifdef CONFIG_IP_DCCP_CCID2_DEBUG -static int ccid2_debug; +static bool ccid2_debug; #define ccid2_pr_debug(format, a...) DCCP_PR_DEBUG(ccid2_debug, format, ##a) #else #define ccid2_pr_debug(format, a...) @@ -85,7 +85,6 @@ static int ccid2_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val) { - struct dccp_sock *dp = dccp_sk(sk); u32 max_ratio = DIV_ROUND_UP(ccid2_hc_tx_sk(sk)->tx_cwnd, 2); /* @@ -98,14 +97,33 @@ static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val) DCCP_WARN("Limiting Ack Ratio (%u) to %u\n", val, max_ratio); val = max_ratio; } - if (val > DCCPF_ACK_RATIO_MAX) - val = DCCPF_ACK_RATIO_MAX; + dccp_feat_signal_nn_change(sk, DCCPF_ACK_RATIO, + min_t(u32, val, DCCPF_ACK_RATIO_MAX)); +} - if (val == dp->dccps_l_ack_ratio) - return; +static void ccid2_check_l_ack_ratio(struct sock *sk) +{ + struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); - ccid2_pr_debug("changing local ack ratio to %u\n", val); - dp->dccps_l_ack_ratio = val; + /* + * After a loss, idle period, application limited period, or RTO we + * need to check that the ack ratio is still less than the congestion + * window. Otherwise, we will send an entire congestion window of + * packets and got no response because we haven't sent ack ratio + * packets yet. + * If the ack ratio does need to be reduced, we reduce it to half of + * the congestion window (or 1 if that's zero) instead of to the + * congestion window. This prevents problems if one ack is lost. + */ + if (dccp_feat_nn_get(sk, DCCPF_ACK_RATIO) > hc->tx_cwnd) + ccid2_change_l_ack_ratio(sk, hc->tx_cwnd/2 ? : 1U); +} + +static void ccid2_change_l_seq_window(struct sock *sk, u64 val) +{ + dccp_feat_signal_nn_change(sk, DCCPF_SEQUENCE_WINDOW, + clamp_val(val, DCCPF_SEQ_WMIN, + DCCPF_SEQ_WMAX)); } static void ccid2_hc_tx_rto_expire(unsigned long data) @@ -156,7 +174,7 @@ out: /* * Congestion window validation (RFC 2861). */ -static int ccid2_do_cwv = 1; +static bool ccid2_do_cwv = true; module_param(ccid2_do_cwv, bool, 0644); MODULE_PARM_DESC(ccid2_do_cwv, "Perform RFC2861 Congestion Window Validation"); @@ -187,6 +205,8 @@ static void ccid2_cwnd_application_limited(struct sock *sk, const u32 now) } hc->tx_cwnd_used = 0; hc->tx_cwnd_stamp = now; + + ccid2_check_l_ack_ratio(sk); } /* This borrows the code of tcp_cwnd_restart() */ @@ -205,6 +225,8 @@ static void ccid2_cwnd_restart(struct sock *sk, const u32 now) hc->tx_cwnd_stamp = now; hc->tx_cwnd_used = 0; + + ccid2_check_l_ack_ratio(sk); } static void ccid2_hc_tx_packet_sent(struct sock *sk, unsigned int len) @@ -405,17 +427,37 @@ static void ccid2_new_ack(struct sock *sk, struct ccid2_seq *seqp, unsigned int *maxincr) { struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); - - if (hc->tx_cwnd < hc->tx_ssthresh) { - if (*maxincr > 0 && ++hc->tx_packets_acked == 2) { + struct dccp_sock *dp = dccp_sk(sk); + int r_seq_used = hc->tx_cwnd / dp->dccps_l_ack_ratio; + + if (hc->tx_cwnd < dp->dccps_l_seq_win && + r_seq_used < dp->dccps_r_seq_win) { + if (hc->tx_cwnd < hc->tx_ssthresh) { + if (*maxincr > 0 && ++hc->tx_packets_acked >= 2) { + hc->tx_cwnd += 1; + *maxincr -= 1; + hc->tx_packets_acked = 0; + } + } else if (++hc->tx_packets_acked >= hc->tx_cwnd) { hc->tx_cwnd += 1; - *maxincr -= 1; hc->tx_packets_acked = 0; } - } else if (++hc->tx_packets_acked >= hc->tx_cwnd) { - hc->tx_cwnd += 1; - hc->tx_packets_acked = 0; } + + /* + * Adjust the local sequence window and the ack ratio to allow about + * 5 times the number of packets in the network (RFC 4340 7.5.2) + */ + if (r_seq_used * CCID2_WIN_CHANGE_FACTOR >= dp->dccps_r_seq_win) + ccid2_change_l_ack_ratio(sk, dp->dccps_l_ack_ratio * 2); + else if (r_seq_used * CCID2_WIN_CHANGE_FACTOR < dp->dccps_r_seq_win/2) + ccid2_change_l_ack_ratio(sk, dp->dccps_l_ack_ratio / 2 ? : 1U); + + if (hc->tx_cwnd * CCID2_WIN_CHANGE_FACTOR >= dp->dccps_l_seq_win) + ccid2_change_l_seq_window(sk, dp->dccps_l_seq_win * 2); + else if (hc->tx_cwnd * CCID2_WIN_CHANGE_FACTOR < dp->dccps_l_seq_win/2) + ccid2_change_l_seq_window(sk, dp->dccps_l_seq_win / 2); + /* * FIXME: RTT is sampled several times per acknowledgment (for each * entry in the Ack Vector), instead of once per Ack (as in TCP SACK). @@ -441,9 +483,7 @@ static void ccid2_congestion_event(struct sock *sk, struct ccid2_seq *seqp) hc->tx_cwnd = hc->tx_cwnd / 2 ? : 1U; hc->tx_ssthresh = max(hc->tx_cwnd, 2U); - /* Avoid spurious timeouts resulting from Ack Ratio > cwnd */ - if (dccp_sk(sk)->dccps_l_ack_ratio > hc->tx_cwnd) - ccid2_change_l_ack_ratio(sk, hc->tx_cwnd); + ccid2_check_l_ack_ratio(sk); } static int ccid2_hc_tx_parse_options(struct sock *sk, u8 packet_type, @@ -494,8 +534,16 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) if (hc->tx_rpdupack >= NUMDUPACK) { hc->tx_rpdupack = -1; /* XXX lame */ hc->tx_rpseq = 0; - +#ifdef __CCID2_COPES_GRACEFULLY_WITH_ACK_CONGESTION_CONTROL__ + /* + * FIXME: Ack Congestion Control is broken; in + * the current state instabilities occurred with + * Ack Ratios greater than 1; causing hang-ups + * and long RTO timeouts. This needs to be fixed + * before opening up dynamic changes. -- gerrit + */ ccid2_change_l_ack_ratio(sk, 2 * dp->dccps_l_ack_ratio); +#endif } } } diff --git a/net/dccp/ccids/ccid2.h b/net/dccp/ccids/ccid2.h index f585d330e1e5..18c97543e522 100644 --- a/net/dccp/ccids/ccid2.h +++ b/net/dccp/ccids/ccid2.h @@ -43,6 +43,12 @@ struct ccid2_seq { #define CCID2_SEQBUF_LEN 1024 #define CCID2_SEQBUF_MAX 128 +/* + * Multiple of congestion window to keep the sequence window at + * (RFC 4340 7.5.2) + */ +#define CCID2_WIN_CHANGE_FACTOR 5 + /** * struct ccid2_hc_tx_sock - CCID2 TX half connection * @tx_{cwnd,ssthresh,pipe}: as per RFC 4341, section 5 diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 3d604e1349c0..560627307200 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -38,7 +38,7 @@ #include <asm/unaligned.h> #ifdef CONFIG_IP_DCCP_CCID3_DEBUG -static int ccid3_debug; +static bool ccid3_debug; #define ccid3_pr_debug(format, a...) DCCP_PR_DEBUG(ccid3_debug, format, ##a) #else #define ccid3_pr_debug(format, a...) diff --git a/net/dccp/ccids/lib/tfrc.c b/net/dccp/ccids/lib/tfrc.c index 4902029854d8..62b5828acde0 100644 --- a/net/dccp/ccids/lib/tfrc.c +++ b/net/dccp/ccids/lib/tfrc.c @@ -4,10 +4,11 @@ * Copyright (c) 2007 The University of Aberdeen, Scotland, UK * Copyright (c) 2007 Arnaldo Carvalho de Melo <acme@redhat.com> */ +#include <linux/moduleparam.h> #include "tfrc.h" #ifdef CONFIG_IP_DCCP_TFRC_DEBUG -int tfrc_debug; +bool tfrc_debug; module_param(tfrc_debug, bool, 0644); MODULE_PARM_DESC(tfrc_debug, "Enable TFRC debug messages"); #endif diff --git a/net/dccp/ccids/lib/tfrc.h b/net/dccp/ccids/lib/tfrc.h index f8ee3f549770..ed698c42a5fb 100644 --- a/net/dccp/ccids/lib/tfrc.h +++ b/net/dccp/ccids/lib/tfrc.h @@ -21,7 +21,7 @@ #include "packet_history.h" #ifdef CONFIG_IP_DCCP_TFRC_DEBUG -extern int tfrc_debug; +extern bool tfrc_debug; #define tfrc_pr_debug(format, a...) DCCP_PR_DEBUG(tfrc_debug, format, ##a) #else #define tfrc_pr_debug(format, a...) diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 5fdb07229017..29d6bb629a6c 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -39,7 +39,7 @@ "%s: " fmt, __func__, ##a) #ifdef CONFIG_IP_DCCP_DEBUG -extern int dccp_debug; +extern bool dccp_debug; #define dccp_pr_debug(format, a...) DCCP_PR_DEBUG(dccp_debug, format, ##a) #define dccp_pr_debug_cat(format, a...) DCCP_PRINTK(dccp_debug, format, ##a) #define dccp_debug(fmt, a...) dccp_pr_debug_cat(KERN_DEBUG fmt, ##a) @@ -357,7 +357,7 @@ static inline int dccp_bad_service_code(const struct sock *sk, struct dccp_skb_cb { union { struct inet_skb_parm h4; -#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) struct inet6_skb_parm h6; #endif } header; @@ -474,6 +474,7 @@ static inline int dccp_ack_pending(const struct sock *sk) return dccp_ackvec_pending(sk) || inet_csk_ack_scheduled(sk); } +extern int dccp_feat_signal_nn_change(struct sock *sk, u8 feat, u64 nn_val); extern int dccp_feat_finalise_settings(struct dccp_sock *dp); extern int dccp_feat_server_ccid_dependencies(struct dccp_request_sock *dreq); extern int dccp_feat_insert_opts(struct dccp_sock*, struct dccp_request_sock*, diff --git a/net/dccp/diag.c b/net/dccp/diag.c index b21f261da75e..028fc43aacbd 100644 --- a/net/dccp/diag.c +++ b/net/dccp/diag.c @@ -48,11 +48,23 @@ static void dccp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, dccp_get_info(sk, _info); } +static void dccp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, + struct inet_diag_req_v2 *r, struct nlattr *bc) +{ + inet_diag_dump_icsk(&dccp_hashinfo, skb, cb, r, bc); +} + +static int dccp_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh, + struct inet_diag_req_v2 *req) +{ + return inet_diag_dump_one_icsk(&dccp_hashinfo, in_skb, nlh, req); +} + static const struct inet_diag_handler dccp_diag_handler = { - .idiag_hashinfo = &dccp_hashinfo, + .dump = dccp_diag_dump, + .dump_one = dccp_diag_dump_one, .idiag_get_info = dccp_diag_get_info, - .idiag_type = DCCPDIAG_GETSOCK, - .idiag_info_size = sizeof(struct tcp_info), + .idiag_type = IPPROTO_DCCP, }; static int __init dccp_diag_init(void) @@ -71,4 +83,4 @@ module_exit(dccp_diag_fini); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@mandriva.com>"); MODULE_DESCRIPTION("DCCP inet_diag handler"); -MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_INET_DIAG, DCCPDIAG_GETSOCK); +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2-33 /* AF_INET - IPPROTO_DCCP */); diff --git a/net/dccp/feat.c b/net/dccp/feat.c index 568def952722..78a2ad70e1b0 100644 --- a/net/dccp/feat.c +++ b/net/dccp/feat.c @@ -12,6 +12,7 @@ * ----------- * o Feature negotiation is coordinated with connection setup (as in TCP), wild * changes of parameters of an established connection are not supported. + * o Changing non-negotiable (NN) values is supported in state OPEN/PARTOPEN. * o All currently known SP features have 1-byte quantities. If in the future * extensions of RFCs 4340..42 define features with item lengths larger than * one byte, a feature-specific extension of the code will be required. @@ -343,6 +344,20 @@ static int __dccp_feat_activate(struct sock *sk, const int idx, return dccp_feat_table[idx].activation_hdlr(sk, val, rx); } +/** + * dccp_feat_activate - Activate feature value on socket + * @sk: fully connected DCCP socket (after handshake is complete) + * @feat_num: feature to activate, one of %dccp_feature_numbers + * @local: whether local (1) or remote (0) @feat_num is meant + * @fval: the value (SP or NN) to activate, or NULL to use the default value + * For general use this function is preferable over __dccp_feat_activate(). + */ +static int dccp_feat_activate(struct sock *sk, u8 feat_num, bool local, + dccp_feat_val const *fval) +{ + return __dccp_feat_activate(sk, dccp_feat_index(feat_num), local, fval); +} + /* Test for "Req'd" feature (RFC 4340, 6.4) */ static inline int dccp_feat_must_be_understood(u8 feat_num) { @@ -475,8 +490,8 @@ static int dccp_feat_push_change(struct list_head *fn_list, u8 feat, u8 local, new->feat_num = feat; new->is_local = local; new->state = FEAT_INITIALISING; - new->needs_confirm = 0; - new->empty_confirm = 0; + new->needs_confirm = false; + new->empty_confirm = false; new->val = *fval; new->needs_mandatory = mandatory; @@ -502,12 +517,12 @@ static int dccp_feat_push_confirm(struct list_head *fn_list, u8 feat, u8 local, new->feat_num = feat; new->is_local = local; new->state = FEAT_STABLE; /* transition in 6.6.2 */ - new->needs_confirm = 1; + new->needs_confirm = true; new->empty_confirm = (fval == NULL); new->val.nn = 0; /* zeroes the whole structure */ if (!new->empty_confirm) new->val = *fval; - new->needs_mandatory = 0; + new->needs_mandatory = false; return 0; } @@ -650,11 +665,22 @@ int dccp_feat_insert_opts(struct dccp_sock *dp, struct dccp_request_sock *dreq, return -1; if (pos->needs_mandatory && dccp_insert_option_mandatory(skb)) return -1; - /* - * Enter CHANGING after transmitting the Change option (6.6.2). - */ - if (pos->state == FEAT_INITIALISING) - pos->state = FEAT_CHANGING; + + if (skb->sk->sk_state == DCCP_OPEN && + (opt == DCCPO_CONFIRM_R || opt == DCCPO_CONFIRM_L)) { + /* + * Confirms don't get retransmitted (6.6.3) once the + * connection is in state OPEN + */ + dccp_feat_list_pop(pos); + } else { + /* + * Enter CHANGING after transmitting the Change + * option (6.6.2). + */ + if (pos->state == FEAT_INITIALISING) + pos->state = FEAT_CHANGING; + } } return 0; } @@ -730,6 +756,70 @@ int dccp_feat_register_sp(struct sock *sk, u8 feat, u8 is_local, 0, list, len); } +/** + * dccp_feat_nn_get - Query current/pending value of NN feature + * @sk: DCCP socket of an established connection + * @feat: NN feature number from %dccp_feature_numbers + * For a known NN feature, returns value currently being negotiated, or + * current (confirmed) value if no negotiation is going on. + */ +u64 dccp_feat_nn_get(struct sock *sk, u8 feat) +{ + if (dccp_feat_type(feat) == FEAT_NN) { + struct dccp_sock *dp = dccp_sk(sk); + struct dccp_feat_entry *entry; + + entry = dccp_feat_list_lookup(&dp->dccps_featneg, feat, 1); + if (entry != NULL) + return entry->val.nn; + + switch (feat) { + case DCCPF_ACK_RATIO: + return dp->dccps_l_ack_ratio; + case DCCPF_SEQUENCE_WINDOW: + return dp->dccps_l_seq_win; + } + } + DCCP_BUG("attempt to look up unsupported feature %u", feat); + return 0; +} +EXPORT_SYMBOL_GPL(dccp_feat_nn_get); + +/** + * dccp_feat_signal_nn_change - Update NN values for an established connection + * @sk: DCCP socket of an established connection + * @feat: NN feature number from %dccp_feature_numbers + * @nn_val: the new value to use + * This function is used to communicate NN updates out-of-band. + */ +int dccp_feat_signal_nn_change(struct sock *sk, u8 feat, u64 nn_val) +{ + struct list_head *fn = &dccp_sk(sk)->dccps_featneg; + dccp_feat_val fval = { .nn = nn_val }; + struct dccp_feat_entry *entry; + + if (sk->sk_state != DCCP_OPEN && sk->sk_state != DCCP_PARTOPEN) + return 0; + + if (dccp_feat_type(feat) != FEAT_NN || + !dccp_feat_is_valid_nn_val(feat, nn_val)) + return -EINVAL; + + if (nn_val == dccp_feat_nn_get(sk, feat)) + return 0; /* already set or negotiation under way */ + + entry = dccp_feat_list_lookup(fn, feat, 1); + if (entry != NULL) { + dccp_pr_debug("Clobbering existing NN entry %llu -> %llu\n", + (unsigned long long)entry->val.nn, + (unsigned long long)nn_val); + dccp_feat_list_pop(entry); + } + + inet_csk_schedule_ack(sk); + return dccp_feat_push_change(fn, feat, 1, 0, &fval); +} +EXPORT_SYMBOL_GPL(dccp_feat_signal_nn_change); /* * Tracking features whose value depend on the choice of CCID @@ -1065,7 +1155,7 @@ static u8 dccp_feat_change_recv(struct list_head *fn, u8 is_mandatory, u8 opt, } if (dccp_feat_reconcile(&entry->val, val, len, server, true)) { - entry->empty_confirm = 0; + entry->empty_confirm = false; } else if (is_mandatory) { return DCCP_RESET_CODE_MANDATORY_ERROR; } else if (entry->state == FEAT_INITIALISING) { @@ -1081,10 +1171,10 @@ static u8 dccp_feat_change_recv(struct list_head *fn, u8 is_mandatory, u8 opt, defval = dccp_feat_default_value(feat); if (!dccp_feat_reconcile(&entry->val, &defval, 1, server, true)) return DCCP_RESET_CODE_OPTION_ERROR; - entry->empty_confirm = 1; + entry->empty_confirm = true; } - entry->needs_confirm = 1; - entry->needs_mandatory = 0; + entry->needs_confirm = true; + entry->needs_mandatory = false; entry->state = FEAT_STABLE; return 0; @@ -1187,6 +1277,100 @@ confirmation_failed: } /** + * dccp_feat_handle_nn_established - Fast-path reception of NN options + * @sk: socket of an established DCCP connection + * @mandatory: whether @opt was preceded by a Mandatory option + * @opt: %DCCPO_CHANGE_L | %DCCPO_CONFIRM_R (NN only) + * @feat: NN number, one of %dccp_feature_numbers + * @val: NN value + * @len: length of @val in bytes + * This function combines the functionality of change_recv/confirm_recv, with + * the following differences (reset codes are the same): + * - cleanup after receiving the Confirm; + * - values are directly activated after successful parsing; + * - deliberately restricted to NN features. + * The restriction to NN features is essential since SP features can have non- + * predictable outcomes (depending on the remote configuration), and are inter- + * dependent (CCIDs for instance cause further dependencies). + */ +static u8 dccp_feat_handle_nn_established(struct sock *sk, u8 mandatory, u8 opt, + u8 feat, u8 *val, u8 len) +{ + struct list_head *fn = &dccp_sk(sk)->dccps_featneg; + const bool local = (opt == DCCPO_CONFIRM_R); + struct dccp_feat_entry *entry; + u8 type = dccp_feat_type(feat); + dccp_feat_val fval; + + dccp_feat_print_opt(opt, feat, val, len, mandatory); + + /* Ignore non-mandatory unknown and non-NN features */ + if (type == FEAT_UNKNOWN) { + if (local && !mandatory) + return 0; + goto fast_path_unknown; + } else if (type != FEAT_NN) { + return 0; + } + + /* + * We don't accept empty Confirms, since in fast-path feature + * negotiation the values are enabled immediately after sending + * the Change option. + * Empty Changes on the other hand are invalid (RFC 4340, 6.1). + */ + if (len == 0 || len > sizeof(fval.nn)) + goto fast_path_unknown; + + if (opt == DCCPO_CHANGE_L) { + fval.nn = dccp_decode_value_var(val, len); + if (!dccp_feat_is_valid_nn_val(feat, fval.nn)) + goto fast_path_unknown; + + if (dccp_feat_push_confirm(fn, feat, local, &fval) || + dccp_feat_activate(sk, feat, local, &fval)) + return DCCP_RESET_CODE_TOO_BUSY; + + /* set the `Ack Pending' flag to piggyback a Confirm */ + inet_csk_schedule_ack(sk); + + } else if (opt == DCCPO_CONFIRM_R) { + entry = dccp_feat_list_lookup(fn, feat, local); + if (entry == NULL || entry->state != FEAT_CHANGING) + return 0; + + fval.nn = dccp_decode_value_var(val, len); + /* + * Just ignore a value that doesn't match our current value. + * If the option changes twice within two RTTs, then at least + * one CONFIRM will be received for the old value after a + * new CHANGE was sent. + */ + if (fval.nn != entry->val.nn) + return 0; + + /* Only activate after receiving the Confirm option (6.6.1). */ + dccp_feat_activate(sk, feat, local, &fval); + + /* It has been confirmed - so remove the entry */ + dccp_feat_list_pop(entry); + + } else { + DCCP_WARN("Received illegal option %u\n", opt); + goto fast_path_failed; + } + return 0; + +fast_path_unknown: + if (!mandatory) + return dccp_push_empty_confirm(fn, feat, local); + +fast_path_failed: + return mandatory ? DCCP_RESET_CODE_MANDATORY_ERROR + : DCCP_RESET_CODE_OPTION_ERROR; +} + +/** * dccp_feat_parse_options - Process Feature-Negotiation Options * @sk: for general use and used by the client during connection setup * @dreq: used by the server during connection setup @@ -1221,6 +1405,14 @@ int dccp_feat_parse_options(struct sock *sk, struct dccp_request_sock *dreq, return dccp_feat_confirm_recv(fn, mandatory, opt, feat, val, len, server); } + break; + /* + * Support for exchanging NN options on an established connection. + */ + case DCCP_OPEN: + case DCCP_PARTOPEN: + return dccp_feat_handle_nn_established(sk, mandatory, opt, feat, + val, len); } return 0; /* ignore FN options in all other states */ } diff --git a/net/dccp/feat.h b/net/dccp/feat.h index e56a4e5e634e..90b957d34d26 100644 --- a/net/dccp/feat.h +++ b/net/dccp/feat.h @@ -129,6 +129,7 @@ extern int dccp_feat_clone_list(struct list_head const *, struct list_head *); extern void dccp_encode_value_var(const u64 value, u8 *to, const u8 len); extern u64 dccp_decode_value_var(const u8 *bf, const u8 len); +extern u64 dccp_feat_nn_get(struct sock *sk, u8 feat); extern int dccp_insert_option_mandatory(struct sk_buff *skb); extern int dccp_insert_fn_opt(struct sk_buff *skb, u8 type, u8 feat, diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 332639b56f4d..1c67fe8ff90d 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -111,6 +111,7 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) rt = ip_route_newports(fl4, rt, orig_sport, orig_dport, inet->inet_sport, inet->inet_dport, sk); if (IS_ERR(rt)) { + err = PTR_ERR(rt); rt = NULL; goto failure; } @@ -433,6 +434,7 @@ exit: NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); return NULL; put_and_exit: + bh_unlock_sock(newsk); sock_put(newsk); goto exit; } @@ -472,10 +474,11 @@ static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk, struct sk_buff *skb) { struct rtable *rt; + const struct iphdr *iph = ip_hdr(skb); struct flowi4 fl4 = { .flowi4_oif = skb_rtable(skb)->rt_iif, - .daddr = ip_hdr(skb)->saddr, - .saddr = ip_hdr(skb)->daddr, + .daddr = iph->saddr, + .saddr = iph->daddr, .flowi4_tos = RT_CONN_FLAGS(sk), .flowi4_proto = sk->sk_protocol, .fl4_sport = dccp_hdr(skb)->dccph_dport, diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index b74f76117dcf..ce903f747e64 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -150,8 +150,8 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, */ memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_proto = IPPROTO_DCCP; - ipv6_addr_copy(&fl6.daddr, &np->daddr); - ipv6_addr_copy(&fl6.saddr, &np->saddr); + fl6.daddr = np->daddr; + fl6.saddr = np->saddr; fl6.flowi6_oif = sk->sk_bound_dev_if; fl6.fl6_dport = inet->inet_dport; fl6.fl6_sport = inet->inet_sport; @@ -244,8 +244,8 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req, memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_proto = IPPROTO_DCCP; - ipv6_addr_copy(&fl6.daddr, &ireq6->rmt_addr); - ipv6_addr_copy(&fl6.saddr, &ireq6->loc_addr); + fl6.daddr = ireq6->rmt_addr; + fl6.saddr = ireq6->loc_addr; fl6.flowlabel = 0; fl6.flowi6_oif = ireq6->iif; fl6.fl6_dport = inet_rsk(req)->rmt_port; @@ -270,8 +270,8 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req, dh->dccph_checksum = dccp_v6_csum_finish(skb, &ireq6->loc_addr, &ireq6->rmt_addr); - ipv6_addr_copy(&fl6.daddr, &ireq6->rmt_addr); - err = ip6_xmit(sk, skb, &fl6, opt); + fl6.daddr = ireq6->rmt_addr; + err = ip6_xmit(sk, skb, &fl6, opt, np->tclass); err = net_xmit_eval(err); } @@ -313,8 +313,8 @@ static void dccp_v6_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb) &rxip6h->daddr); memset(&fl6, 0, sizeof(fl6)); - ipv6_addr_copy(&fl6.daddr, &rxip6h->saddr); - ipv6_addr_copy(&fl6.saddr, &rxip6h->daddr); + fl6.daddr = rxip6h->saddr; + fl6.saddr = rxip6h->daddr; fl6.flowi6_proto = IPPROTO_DCCP; fl6.flowi6_oif = inet6_iif(rxskb); @@ -326,7 +326,7 @@ static void dccp_v6_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb) dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL, false); if (!IS_ERR(dst)) { skb_dst_set(skb, dst); - ip6_xmit(ctl_sk, skb, &fl6, NULL); + ip6_xmit(ctl_sk, skb, &fl6, NULL, 0); DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS); DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS); return; @@ -419,8 +419,8 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb) goto drop_and_free; ireq6 = inet6_rsk(req); - ipv6_addr_copy(&ireq6->rmt_addr, &ipv6_hdr(skb)->saddr); - ipv6_addr_copy(&ireq6->loc_addr, &ipv6_hdr(skb)->daddr); + ireq6->rmt_addr = ipv6_hdr(skb)->saddr; + ireq6->loc_addr = ipv6_hdr(skb)->daddr; if (ipv6_opt_accepted(sk, skb) || np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo || @@ -491,7 +491,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, ipv6_addr_set_v4mapped(newinet->inet_saddr, &newnp->saddr); - ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr); + newnp->rcv_saddr = newnp->saddr; inet_csk(newsk)->icsk_af_ops = &dccp_ipv6_mapped; newsk->sk_backlog_rcv = dccp_v4_do_rcv; @@ -526,9 +526,9 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_proto = IPPROTO_DCCP; - ipv6_addr_copy(&fl6.daddr, &ireq6->rmt_addr); + fl6.daddr = ireq6->rmt_addr; final_p = fl6_update_dst(&fl6, opt, &final); - ipv6_addr_copy(&fl6.saddr, &ireq6->loc_addr); + fl6.saddr = ireq6->loc_addr; fl6.flowi6_oif = sk->sk_bound_dev_if; fl6.fl6_dport = inet_rsk(req)->rmt_port; fl6.fl6_sport = inet_rsk(req)->loc_port; @@ -559,9 +559,9 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, memcpy(newnp, np, sizeof(struct ipv6_pinfo)); - ipv6_addr_copy(&newnp->daddr, &ireq6->rmt_addr); - ipv6_addr_copy(&newnp->saddr, &ireq6->loc_addr); - ipv6_addr_copy(&newnp->rcv_saddr, &ireq6->loc_addr); + newnp->daddr = ireq6->rmt_addr; + newnp->saddr = ireq6->loc_addr; + newnp->rcv_saddr = ireq6->loc_addr; newsk->sk_bound_dev_if = ireq6->iif; /* Now IPv6 options... @@ -877,7 +877,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); if (flowlabel == NULL) return -EINVAL; - ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst); + usin->sin6_addr = flowlabel->dst; fl6_sock_release(flowlabel); } } @@ -910,7 +910,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, return -EINVAL; } - ipv6_addr_copy(&np->daddr, &usin->sin6_addr); + np->daddr = usin->sin6_addr; np->flow_label = fl6.flowlabel; /* @@ -949,8 +949,8 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, saddr = &np->rcv_saddr; fl6.flowi6_proto = IPPROTO_DCCP; - ipv6_addr_copy(&fl6.daddr, &np->daddr); - ipv6_addr_copy(&fl6.saddr, saddr ? saddr : &np->saddr); + fl6.daddr = np->daddr; + fl6.saddr = saddr ? *saddr : np->saddr; fl6.flowi6_oif = sk->sk_bound_dev_if; fl6.fl6_dport = usin->sin6_port; fl6.fl6_sport = inet->inet_sport; @@ -966,11 +966,11 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, if (saddr == NULL) { saddr = &fl6.saddr; - ipv6_addr_copy(&np->rcv_saddr, saddr); + np->rcv_saddr = *saddr; } /* set the source address */ - ipv6_addr_copy(&np->saddr, saddr); + np->saddr = *saddr; inet->inet_rcv_saddr = LOOPBACK4_IPV6; __ip6_dst_store(sk, dst, NULL, NULL); diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index d7041a0963af..5a7f90bbffac 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -53,15 +53,15 @@ void dccp_time_wait(struct sock *sk, int state, int timeo) if (tw != NULL) { const struct inet_connection_sock *icsk = inet_csk(sk); const int rto = (icsk->icsk_rto << 2) - (icsk->icsk_rto >> 1); -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) if (tw->tw_family == PF_INET6) { const struct ipv6_pinfo *np = inet6_sk(sk); struct inet6_timewait_sock *tw6; tw->tw_ipv6_offset = inet6_tw_offset(sk->sk_prot); tw6 = inet6_twsk((struct sock *)tw); - ipv6_addr_copy(&tw6->tw_v6_daddr, &np->daddr); - ipv6_addr_copy(&tw6->tw_v6_rcv_saddr, &np->rcv_saddr); + tw6->tw_v6_daddr = np->daddr; + tw6->tw_v6_rcv_saddr = np->rcv_saddr; tw->tw_ipv6only = np->ipv6only; } #endif @@ -100,7 +100,7 @@ struct sock *dccp_create_openreq_child(struct sock *sk, * (* Generate a new socket and switch to that socket *) * Set S := new socket for this port pair */ - struct sock *newsk = inet_csk_clone(sk, req, GFP_ATOMIC); + struct sock *newsk = inet_csk_clone_lock(sk, req, GFP_ATOMIC); if (newsk != NULL) { struct dccp_request_sock *dreq = dccp_rsk(req); diff --git a/net/dccp/options.c b/net/dccp/options.c index 4b2ab657ac8e..68fa6b7a3e01 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -544,7 +544,7 @@ int dccp_insert_fn_opt(struct sk_buff *skb, u8 type, u8 feat, } if (unlikely(val == NULL || len == 0)) - len = repeat_first = 0; + len = repeat_first = false; tot_len = 3 + repeat_first + len; if (DCCP_SKB_CB(skb)->dccpd_opt_len + tot_len > DCCP_MAX_OPT_LEN) { diff --git a/net/dccp/probe.c b/net/dccp/probe.c index 33d0e6297c21..0a8d6ebd9b45 100644 --- a/net/dccp/probe.c +++ b/net/dccp/probe.c @@ -152,6 +152,17 @@ static const struct file_operations dccpprobe_fops = { .llseek = noop_llseek, }; +static __init int setup_jprobe(void) +{ + int ret = register_jprobe(&dccp_send_probe); + + if (ret) { + request_module("dccp"); + ret = register_jprobe(&dccp_send_probe); + } + return ret; +} + static __init int dccpprobe_init(void) { int ret = -ENOMEM; @@ -163,8 +174,7 @@ static __init int dccpprobe_init(void) if (!proc_net_fops_create(&init_net, procname, S_IRUSR, &dccpprobe_fops)) goto err0; - try_then_request_module((ret = register_jprobe(&dccp_send_probe)) == 0, - "dccp"); + ret = setup_jprobe(); if (ret) goto err1; diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 152975d942d9..7065c0ae1e7b 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -184,7 +184,6 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized) dp->dccps_rate_last = jiffies; dp->dccps_role = DCCP_ROLE_UNDEFINED; dp->dccps_service = DCCP_SERVICE_CODE_IS_ABSENT; - dp->dccps_l_ack_ratio = dp->dccps_r_ack_ratio = 1; dp->dccps_tx_qlen = sysctl_dccp_tx_qlen; dccp_init_xmit_timers(sk); @@ -1100,7 +1099,7 @@ module_param(thash_entries, int, 0444); MODULE_PARM_DESC(thash_entries, "Number of ehash buckets"); #ifdef CONFIG_IP_DCCP_DEBUG -int dccp_debug; +bool dccp_debug; module_param(dccp_debug, bool, 0644); MODULE_PARM_DESC(dccp_debug, "Enable debug messages"); diff --git a/net/dccp/timer.c b/net/dccp/timer.c index 7587870b7040..16f0b223102e 100644 --- a/net/dccp/timer.c +++ b/net/dccp/timer.c @@ -12,6 +12,7 @@ #include <linux/dccp.h> #include <linux/skbuff.h> +#include <linux/export.h> #include "dccp.h" diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index ba4faceec405..74d321a60e7b 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -1101,7 +1101,7 @@ static struct dn_dev *dn_dev_create(struct net_device *dev, int *err) dn_db->neigh_parms = neigh_parms_alloc(dev, &dn_neigh_table); if (!dn_db->neigh_parms) { - rcu_assign_pointer(dev->dn_ptr, NULL); + RCU_INIT_POINTER(dev->dn_ptr, NULL); kfree(dn_db); return NULL; } diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c index 7f0eb087dc11..befe426491ba 100644 --- a/net/decnet/dn_neigh.c +++ b/net/decnet/dn_neigh.c @@ -88,9 +88,9 @@ static const struct neigh_ops dn_phase3_ops = { static u32 dn_neigh_hash(const void *pkey, const struct net_device *dev, - __u32 hash_rnd) + __u32 *hash_rnd) { - return jhash_2words(*(__u16 *)pkey, 0, hash_rnd); + return jhash_2words(*(__u16 *)pkey, 0, hash_rnd[0]); } struct neigh_table dn_neigh_table = { @@ -107,7 +107,7 @@ struct neigh_table dn_neigh_table = { .gc_staletime = 60 * HZ, .reachable_time = 30 * HZ, .delay_probe_time = 5 * HZ, - .queue_len = 3, + .queue_len_bytes = 64*1024, .ucast_probes = 0, .app_probes = 0, .mcast_probes = 0, @@ -202,7 +202,7 @@ static int dn_neigh_output_packet(struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); struct dn_route *rt = (struct dn_route *)dst; - struct neighbour *neigh = dst_get_neighbour(dst); + struct neighbour *neigh = dst_get_neighbour_noref(dst); struct net_device *dev = neigh->dev; char mac_addr[ETH_ALEN]; diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 43450c100226..f31ce72dca65 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -77,6 +77,7 @@ #include <linux/netfilter_decnet.h> #include <linux/rcupdate.h> #include <linux/times.h> +#include <linux/export.h> #include <asm/errno.h> #include <net/net_namespace.h> #include <net/netlink.h> @@ -111,7 +112,7 @@ static unsigned long dn_rt_deadline; static int dn_dst_gc(struct dst_ops *ops); static struct dst_entry *dn_dst_check(struct dst_entry *, __u32); static unsigned int dn_dst_default_advmss(const struct dst_entry *dst); -static unsigned int dn_dst_default_mtu(const struct dst_entry *dst); +static unsigned int dn_dst_mtu(const struct dst_entry *dst); static void dn_dst_destroy(struct dst_entry *); static struct dst_entry *dn_dst_negative_advice(struct dst_entry *); static void dn_dst_link_failure(struct sk_buff *); @@ -134,7 +135,7 @@ static struct dst_ops dn_dst_ops = { .gc = dn_dst_gc, .check = dn_dst_check, .default_advmss = dn_dst_default_advmss, - .default_mtu = dn_dst_default_mtu, + .mtu = dn_dst_mtu, .cow_metrics = dst_cow_metrics_generic, .destroy = dn_dst_destroy, .negative_advice = dn_dst_negative_advice, @@ -243,7 +244,7 @@ static int dn_dst_gc(struct dst_ops *ops) */ static void dn_dst_update_pmtu(struct dst_entry *dst, u32 mtu) { - struct neighbour *n = dst_get_neighbour(dst); + struct neighbour *n = dst_get_neighbour_noref(dst); u32 min_mtu = 230; struct dn_dev *dn; @@ -712,7 +713,7 @@ out: static int dn_to_neigh_output(struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); - struct neighbour *n = dst_get_neighbour(dst); + struct neighbour *n = dst_get_neighbour_noref(dst); return n->output(n, skb); } @@ -727,7 +728,7 @@ static int dn_output(struct sk_buff *skb) int err = -EINVAL; - if ((neigh = dst_get_neighbour(dst)) == NULL) + if ((neigh = dst_get_neighbour_noref(dst)) == NULL) goto error; skb->dev = dev; @@ -824,9 +825,11 @@ static unsigned int dn_dst_default_advmss(const struct dst_entry *dst) return dn_mss_from_pmtu(dst->dev, dst_mtu(dst)); } -static unsigned int dn_dst_default_mtu(const struct dst_entry *dst) +static unsigned int dn_dst_mtu(const struct dst_entry *dst) { - return dst->dev->mtu; + unsigned int mtu = dst_metric_raw(dst, RTAX_MTU); + + return mtu ? : dst->dev->mtu; } static struct neighbour *dn_dst_neigh_lookup(const struct dst_entry *dst, const void *daddr) @@ -849,7 +852,7 @@ static int dn_rt_set_next_hop(struct dn_route *rt, struct dn_fib_res *res) } rt->rt_type = res->type; - if (dev != NULL && dst_get_neighbour(&rt->dst) == NULL) { + if (dev != NULL && dst_get_neighbour_noref(&rt->dst) == NULL) { n = __neigh_lookup_errno(&dn_neigh_table, &rt->rt_gateway, dev); if (IS_ERR(n)) return PTR_ERR(n); diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c index f0efb0ccfeca..f65c9ddaee41 100644 --- a/net/decnet/dn_rules.c +++ b/net/decnet/dn_rules.c @@ -23,6 +23,7 @@ #include <linux/spinlock.h> #include <linux/list.h> #include <linux/rcupdate.h> +#include <linux/export.h> #include <net/neighbour.h> #include <net/dst.h> #include <net/flow.h> diff --git a/net/decnet/dn_timer.c b/net/decnet/dn_timer.c index 67f691bd4acf..d9c150cc59a9 100644 --- a/net/decnet/dn_timer.c +++ b/net/decnet/dn_timer.c @@ -36,16 +36,13 @@ static void dn_slow_timer(unsigned long arg); void dn_start_slow_timer(struct sock *sk) { - sk->sk_timer.expires = jiffies + SLOW_INTERVAL; - sk->sk_timer.function = dn_slow_timer; - sk->sk_timer.data = (unsigned long)sk; - - add_timer(&sk->sk_timer); + setup_timer(&sk->sk_timer, dn_slow_timer, (unsigned long)sk); + sk_reset_timer(sk, &sk->sk_timer, jiffies + SLOW_INTERVAL); } void dn_stop_slow_timer(struct sock *sk) { - del_timer(&sk->sk_timer); + sk_stop_timer(sk, &sk->sk_timer); } static void dn_slow_timer(unsigned long arg) @@ -53,12 +50,10 @@ static void dn_slow_timer(unsigned long arg) struct sock *sk = (struct sock *)arg; struct dn_scp *scp = DN_SK(sk); - sock_hold(sk); bh_lock_sock(sk); if (sock_owned_by_user(sk)) { - sk->sk_timer.expires = jiffies + HZ / 10; - add_timer(&sk->sk_timer); + sk_reset_timer(sk, &sk->sk_timer, jiffies + HZ / 10); goto out; } @@ -100,9 +95,7 @@ static void dn_slow_timer(unsigned long arg) scp->keepalive_fxn(sk); } - sk->sk_timer.expires = jiffies + SLOW_INTERVAL; - - add_timer(&sk->sk_timer); + sk_reset_timer(sk, &sk->sk_timer, jiffies + SLOW_INTERVAL); out: bh_unlock_sock(sk); sock_put(sk); diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig index c53ded2a98df..274791cd7a35 100644 --- a/net/dsa/Kconfig +++ b/net/dsa/Kconfig @@ -1,5 +1,5 @@ -menuconfig NET_DSA - bool "Distributed Switch Architecture support" +config NET_DSA + tristate "Distributed Switch Architecture support" default n depends on EXPERIMENTAL && NETDEVICES && !S390 select PHYLIB @@ -23,38 +23,4 @@ config NET_DSA_TAG_TRAILER bool default n - -# switch drivers -config NET_DSA_MV88E6XXX - bool - default n - -config NET_DSA_MV88E6060 - bool "Marvell 88E6060 ethernet switch chip support" - select NET_DSA_TAG_TRAILER - ---help--- - This enables support for the Marvell 88E6060 ethernet switch - chip. - -config NET_DSA_MV88E6XXX_NEED_PPU - bool - default n - -config NET_DSA_MV88E6131 - bool "Marvell 88E6085/6095/6095F/6131 ethernet switch chip support" - select NET_DSA_MV88E6XXX - select NET_DSA_MV88E6XXX_NEED_PPU - select NET_DSA_TAG_DSA - ---help--- - This enables support for the Marvell 88E6085/6095/6095F/6131 - ethernet switch chips. - -config NET_DSA_MV88E6123_61_65 - bool "Marvell 88E6123/6161/6165 ethernet switch chip support" - select NET_DSA_MV88E6XXX - select NET_DSA_TAG_EDSA - ---help--- - This enables support for the Marvell 88E6123/6161/6165 - ethernet switch chips. - endif diff --git a/net/dsa/Makefile b/net/dsa/Makefile index 2374faff4dea..7b9fcbbeda5d 100644 --- a/net/dsa/Makefile +++ b/net/dsa/Makefile @@ -1,13 +1,8 @@ -# tagging formats -obj-$(CONFIG_NET_DSA_TAG_DSA) += tag_dsa.o -obj-$(CONFIG_NET_DSA_TAG_EDSA) += tag_edsa.o -obj-$(CONFIG_NET_DSA_TAG_TRAILER) += tag_trailer.o - -# switch drivers -obj-$(CONFIG_NET_DSA_MV88E6XXX) += mv88e6xxx.o -obj-$(CONFIG_NET_DSA_MV88E6060) += mv88e6060.o -obj-$(CONFIG_NET_DSA_MV88E6123_61_65) += mv88e6123_61_65.o -obj-$(CONFIG_NET_DSA_MV88E6131) += mv88e6131.o - # the core -obj-$(CONFIG_NET_DSA) += dsa.o slave.o +obj-$(CONFIG_NET_DSA) += dsa_core.o +dsa_core-y += dsa.o slave.o + +# tagging formats +dsa_core-$(CONFIG_NET_DSA_TAG_DSA) += tag_dsa.o +dsa_core-$(CONFIG_NET_DSA_TAG_EDSA) += tag_edsa.o +dsa_core-$(CONFIG_NET_DSA_TAG_TRAILER) += tag_trailer.o diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 3fb14b7c13cf..88e7c2f3fa0d 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -12,6 +12,7 @@ #include <linux/netdevice.h> #include <linux/platform_device.h> #include <linux/slab.h> +#include <linux/module.h> #include <net/dsa.h> #include "dsa_priv.h" @@ -28,6 +29,7 @@ void register_switch_driver(struct dsa_switch_driver *drv) list_add_tail(&drv->list, &dsa_switch_drivers); mutex_unlock(&dsa_switch_drivers_mutex); } +EXPORT_SYMBOL_GPL(register_switch_driver); void unregister_switch_driver(struct dsa_switch_driver *drv) { @@ -35,6 +37,7 @@ void unregister_switch_driver(struct dsa_switch_driver *drv) list_del_init(&drv->list); mutex_unlock(&dsa_switch_drivers_mutex); } +EXPORT_SYMBOL_GPL(unregister_switch_driver); static struct dsa_switch_driver * dsa_switch_probe(struct mii_bus *bus, int sw_addr, char **_name) @@ -198,29 +201,6 @@ static void dsa_switch_destroy(struct dsa_switch *ds) } -/* hooks for ethertype-less tagging formats *********************************/ -/* - * The original DSA tag format and some other tag formats have no - * ethertype, which means that we need to add a little hack to the - * networking receive path to make sure that received frames get - * the right ->protocol assigned to them when one of those tag - * formats is in use. - */ -bool dsa_uses_dsa_tags(void *dsa_ptr) -{ - struct dsa_switch_tree *dst = dsa_ptr; - - return !!(dst->tag_protocol == htons(ETH_P_DSA)); -} - -bool dsa_uses_trailer_tags(void *dsa_ptr) -{ - struct dsa_switch_tree *dst = dsa_ptr; - - return !!(dst->tag_protocol == htons(ETH_P_TRAILER)); -} - - /* link polling *************************************************************/ static void dsa_link_poll_work(struct work_struct *ugly) { @@ -418,12 +398,36 @@ static struct platform_driver dsa_driver = { static int __init dsa_init_module(void) { - return platform_driver_register(&dsa_driver); + int rc; + + rc = platform_driver_register(&dsa_driver); + if (rc) + return rc; + +#ifdef CONFIG_NET_DSA_TAG_DSA + dev_add_pack(&dsa_packet_type); +#endif +#ifdef CONFIG_NET_DSA_TAG_EDSA + dev_add_pack(&edsa_packet_type); +#endif +#ifdef CONFIG_NET_DSA_TAG_TRAILER + dev_add_pack(&trailer_packet_type); +#endif + return 0; } module_init(dsa_init_module); static void __exit dsa_cleanup_module(void) { +#ifdef CONFIG_NET_DSA_TAG_TRAILER + dev_remove_pack(&trailer_packet_type); +#endif +#ifdef CONFIG_NET_DSA_TAG_EDSA + dev_remove_pack(&edsa_packet_type); +#endif +#ifdef CONFIG_NET_DSA_TAG_DSA + dev_remove_pack(&dsa_packet_type); +#endif platform_driver_unregister(&dsa_driver); } module_exit(dsa_cleanup_module); diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index 4b0ea0540442..d4cf5cc747e3 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -11,97 +11,9 @@ #ifndef __DSA_PRIV_H #define __DSA_PRIV_H -#include <linux/list.h> #include <linux/phy.h> -#include <linux/timer.h> -#include <linux/workqueue.h> #include <net/dsa.h> -struct dsa_switch { - /* - * Parent switch tree, and switch index. - */ - struct dsa_switch_tree *dst; - int index; - - /* - * Configuration data for this switch. - */ - struct dsa_chip_data *pd; - - /* - * The used switch driver. - */ - struct dsa_switch_driver *drv; - - /* - * Reference to mii bus to use. - */ - struct mii_bus *master_mii_bus; - - /* - * Slave mii_bus and devices for the individual ports. - */ - u32 dsa_port_mask; - u32 phys_port_mask; - struct mii_bus *slave_mii_bus; - struct net_device *ports[DSA_MAX_PORTS]; -}; - -struct dsa_switch_tree { - /* - * Configuration data for the platform device that owns - * this dsa switch tree instance. - */ - struct dsa_platform_data *pd; - - /* - * Reference to network device to use, and which tagging - * protocol to use. - */ - struct net_device *master_netdev; - __be16 tag_protocol; - - /* - * The switch and port to which the CPU is attached. - */ - s8 cpu_switch; - s8 cpu_port; - - /* - * Link state polling. - */ - int link_poll_needed; - struct work_struct link_poll_work; - struct timer_list link_poll_timer; - - /* - * Data for the individual switch chips. - */ - struct dsa_switch *ds[DSA_MAX_SWITCHES]; -}; - -static inline bool dsa_is_cpu_port(struct dsa_switch *ds, int p) -{ - return !!(ds->index == ds->dst->cpu_switch && p == ds->dst->cpu_port); -} - -static inline u8 dsa_upstream_port(struct dsa_switch *ds) -{ - struct dsa_switch_tree *dst = ds->dst; - - /* - * If this is the root switch (i.e. the switch that connects - * to the CPU), return the cpu port number on this switch. - * Else return the (DSA) port number that connects to the - * switch that is one hop closer to the cpu. - */ - if (dst->cpu_switch == ds->index) - return dst->cpu_port; - else - return ds->pd->rtable[dst->cpu_switch]; -} - struct dsa_slave_priv { /* * The linux network interface corresponding to this @@ -123,44 +35,8 @@ struct dsa_slave_priv { struct phy_device *phy; }; -struct dsa_switch_driver { - struct list_head list; - - __be16 tag_protocol; - int priv_size; - - /* - * Probing and setup. - */ - char *(*probe)(struct mii_bus *bus, int sw_addr); - int (*setup)(struct dsa_switch *ds); - int (*set_addr)(struct dsa_switch *ds, u8 *addr); - - /* - * Access to the switch's PHY registers. - */ - int (*phy_read)(struct dsa_switch *ds, int port, int regnum); - int (*phy_write)(struct dsa_switch *ds, int port, - int regnum, u16 val); - - /* - * Link state polling and IRQ handling. - */ - void (*poll_link)(struct dsa_switch *ds); - - /* - * ethtool hardware statistics. - */ - void (*get_strings)(struct dsa_switch *ds, int port, uint8_t *data); - void (*get_ethtool_stats)(struct dsa_switch *ds, - int port, uint64_t *data); - int (*get_sset_count)(struct dsa_switch *ds); -}; - /* dsa.c */ extern char dsa_driver_version[]; -void register_switch_driver(struct dsa_switch_driver *type); -void unregister_switch_driver(struct dsa_switch_driver *type); /* slave.c */ void dsa_slave_mii_bus_init(struct dsa_switch *ds); @@ -170,12 +46,15 @@ struct net_device *dsa_slave_create(struct dsa_switch *ds, /* tag_dsa.c */ netdev_tx_t dsa_xmit(struct sk_buff *skb, struct net_device *dev); +extern struct packet_type dsa_packet_type; /* tag_edsa.c */ netdev_tx_t edsa_xmit(struct sk_buff *skb, struct net_device *dev); +extern struct packet_type edsa_packet_type; /* tag_trailer.c */ netdev_tx_t trailer_xmit(struct sk_buff *skb, struct net_device *dev); +extern struct packet_type trailer_packet_type; #endif diff --git a/net/dsa/mv88e6060.c b/net/dsa/mv88e6060.c deleted file mode 100644 index 8f4ff5a2c813..000000000000 --- a/net/dsa/mv88e6060.c +++ /dev/null @@ -1,288 +0,0 @@ -/* - * net/dsa/mv88e6060.c - Driver for Marvell 88e6060 switch chips - * Copyright (c) 2008-2009 Marvell Semiconductor - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - */ - -#include <linux/list.h> -#include <linux/netdevice.h> -#include <linux/phy.h> -#include "dsa_priv.h" - -#define REG_PORT(p) (8 + (p)) -#define REG_GLOBAL 0x0f - -static int reg_read(struct dsa_switch *ds, int addr, int reg) -{ - return mdiobus_read(ds->master_mii_bus, ds->pd->sw_addr + addr, reg); -} - -#define REG_READ(addr, reg) \ - ({ \ - int __ret; \ - \ - __ret = reg_read(ds, addr, reg); \ - if (__ret < 0) \ - return __ret; \ - __ret; \ - }) - - -static int reg_write(struct dsa_switch *ds, int addr, int reg, u16 val) -{ - return mdiobus_write(ds->master_mii_bus, ds->pd->sw_addr + addr, - reg, val); -} - -#define REG_WRITE(addr, reg, val) \ - ({ \ - int __ret; \ - \ - __ret = reg_write(ds, addr, reg, val); \ - if (__ret < 0) \ - return __ret; \ - }) - -static char *mv88e6060_probe(struct mii_bus *bus, int sw_addr) -{ - int ret; - - ret = mdiobus_read(bus, sw_addr + REG_PORT(0), 0x03); - if (ret >= 0) { - ret &= 0xfff0; - if (ret == 0x0600) - return "Marvell 88E6060"; - } - - return NULL; -} - -static int mv88e6060_switch_reset(struct dsa_switch *ds) -{ - int i; - int ret; - - /* - * Set all ports to the disabled state. - */ - for (i = 0; i < 6; i++) { - ret = REG_READ(REG_PORT(i), 0x04); - REG_WRITE(REG_PORT(i), 0x04, ret & 0xfffc); - } - - /* - * Wait for transmit queues to drain. - */ - msleep(2); - - /* - * Reset the switch. - */ - REG_WRITE(REG_GLOBAL, 0x0a, 0xa130); - - /* - * Wait up to one second for reset to complete. - */ - for (i = 0; i < 1000; i++) { - ret = REG_READ(REG_GLOBAL, 0x00); - if ((ret & 0x8000) == 0x0000) - break; - - msleep(1); - } - if (i == 1000) - return -ETIMEDOUT; - - return 0; -} - -static int mv88e6060_setup_global(struct dsa_switch *ds) -{ - /* - * Disable discarding of frames with excessive collisions, - * set the maximum frame size to 1536 bytes, and mask all - * interrupt sources. - */ - REG_WRITE(REG_GLOBAL, 0x04, 0x0800); - - /* - * Enable automatic address learning, set the address - * database size to 1024 entries, and set the default aging - * time to 5 minutes. - */ - REG_WRITE(REG_GLOBAL, 0x0a, 0x2130); - - return 0; -} - -static int mv88e6060_setup_port(struct dsa_switch *ds, int p) -{ - int addr = REG_PORT(p); - - /* - * Do not force flow control, disable Ingress and Egress - * Header tagging, disable VLAN tunneling, and set the port - * state to Forwarding. Additionally, if this is the CPU - * port, enable Ingress and Egress Trailer tagging mode. - */ - REG_WRITE(addr, 0x04, dsa_is_cpu_port(ds, p) ? 0x4103 : 0x0003); - - /* - * Port based VLAN map: give each port its own address - * database, allow the CPU port to talk to each of the 'real' - * ports, and allow each of the 'real' ports to only talk to - * the CPU port. - */ - REG_WRITE(addr, 0x06, - ((p & 0xf) << 12) | - (dsa_is_cpu_port(ds, p) ? - ds->phys_port_mask : - (1 << ds->dst->cpu_port))); - - /* - * Port Association Vector: when learning source addresses - * of packets, add the address to the address database using - * a port bitmap that has only the bit for this port set and - * the other bits clear. - */ - REG_WRITE(addr, 0x0b, 1 << p); - - return 0; -} - -static int mv88e6060_setup(struct dsa_switch *ds) -{ - int i; - int ret; - - ret = mv88e6060_switch_reset(ds); - if (ret < 0) - return ret; - - /* @@@ initialise atu */ - - ret = mv88e6060_setup_global(ds); - if (ret < 0) - return ret; - - for (i = 0; i < 6; i++) { - ret = mv88e6060_setup_port(ds, i); - if (ret < 0) - return ret; - } - - return 0; -} - -static int mv88e6060_set_addr(struct dsa_switch *ds, u8 *addr) -{ - REG_WRITE(REG_GLOBAL, 0x01, (addr[0] << 8) | addr[1]); - REG_WRITE(REG_GLOBAL, 0x02, (addr[2] << 8) | addr[3]); - REG_WRITE(REG_GLOBAL, 0x03, (addr[4] << 8) | addr[5]); - - return 0; -} - -static int mv88e6060_port_to_phy_addr(int port) -{ - if (port >= 0 && port <= 5) - return port; - return -1; -} - -static int mv88e6060_phy_read(struct dsa_switch *ds, int port, int regnum) -{ - int addr; - - addr = mv88e6060_port_to_phy_addr(port); - if (addr == -1) - return 0xffff; - - return reg_read(ds, addr, regnum); -} - -static int -mv88e6060_phy_write(struct dsa_switch *ds, int port, int regnum, u16 val) -{ - int addr; - - addr = mv88e6060_port_to_phy_addr(port); - if (addr == -1) - return 0xffff; - - return reg_write(ds, addr, regnum, val); -} - -static void mv88e6060_poll_link(struct dsa_switch *ds) -{ - int i; - - for (i = 0; i < DSA_MAX_PORTS; i++) { - struct net_device *dev; - int uninitialized_var(port_status); - int link; - int speed; - int duplex; - int fc; - - dev = ds->ports[i]; - if (dev == NULL) - continue; - - link = 0; - if (dev->flags & IFF_UP) { - port_status = reg_read(ds, REG_PORT(i), 0x00); - if (port_status < 0) - continue; - - link = !!(port_status & 0x1000); - } - - if (!link) { - if (netif_carrier_ok(dev)) { - printk(KERN_INFO "%s: link down\n", dev->name); - netif_carrier_off(dev); - } - continue; - } - - speed = (port_status & 0x0100) ? 100 : 10; - duplex = (port_status & 0x0200) ? 1 : 0; - fc = ((port_status & 0xc000) == 0xc000) ? 1 : 0; - - if (!netif_carrier_ok(dev)) { - printk(KERN_INFO "%s: link up, %d Mb/s, %s duplex, " - "flow control %sabled\n", dev->name, - speed, duplex ? "full" : "half", - fc ? "en" : "dis"); - netif_carrier_on(dev); - } - } -} - -static struct dsa_switch_driver mv88e6060_switch_driver = { - .tag_protocol = htons(ETH_P_TRAILER), - .probe = mv88e6060_probe, - .setup = mv88e6060_setup, - .set_addr = mv88e6060_set_addr, - .phy_read = mv88e6060_phy_read, - .phy_write = mv88e6060_phy_write, - .poll_link = mv88e6060_poll_link, -}; - -static int __init mv88e6060_init(void) -{ - register_switch_driver(&mv88e6060_switch_driver); - return 0; -} -module_init(mv88e6060_init); - -static void __exit mv88e6060_cleanup(void) -{ - unregister_switch_driver(&mv88e6060_switch_driver); -} -module_exit(mv88e6060_cleanup); diff --git a/net/dsa/mv88e6123_61_65.c b/net/dsa/mv88e6123_61_65.c deleted file mode 100644 index 52faaa21a4d9..000000000000 --- a/net/dsa/mv88e6123_61_65.c +++ /dev/null @@ -1,447 +0,0 @@ -/* - * net/dsa/mv88e6123_61_65.c - Marvell 88e6123/6161/6165 switch chip support - * Copyright (c) 2008-2009 Marvell Semiconductor - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - */ - -#include <linux/list.h> -#include <linux/netdevice.h> -#include <linux/phy.h> -#include "dsa_priv.h" -#include "mv88e6xxx.h" - -static char *mv88e6123_61_65_probe(struct mii_bus *bus, int sw_addr) -{ - int ret; - - ret = __mv88e6xxx_reg_read(bus, sw_addr, REG_PORT(0), 0x03); - if (ret >= 0) { - ret &= 0xfff0; - if (ret == 0x1210) - return "Marvell 88E6123"; - if (ret == 0x1610) - return "Marvell 88E6161"; - if (ret == 0x1650) - return "Marvell 88E6165"; - } - - return NULL; -} - -static int mv88e6123_61_65_switch_reset(struct dsa_switch *ds) -{ - int i; - int ret; - - /* - * Set all ports to the disabled state. - */ - for (i = 0; i < 8; i++) { - ret = REG_READ(REG_PORT(i), 0x04); - REG_WRITE(REG_PORT(i), 0x04, ret & 0xfffc); - } - - /* - * Wait for transmit queues to drain. - */ - msleep(2); - - /* - * Reset the switch. - */ - REG_WRITE(REG_GLOBAL, 0x04, 0xc400); - - /* - * Wait up to one second for reset to complete. - */ - for (i = 0; i < 1000; i++) { - ret = REG_READ(REG_GLOBAL, 0x00); - if ((ret & 0xc800) == 0xc800) - break; - - msleep(1); - } - if (i == 1000) - return -ETIMEDOUT; - - return 0; -} - -static int mv88e6123_61_65_setup_global(struct dsa_switch *ds) -{ - int ret; - int i; - - /* - * Disable the PHY polling unit (since there won't be any - * external PHYs to poll), don't discard packets with - * excessive collisions, and mask all interrupt sources. - */ - REG_WRITE(REG_GLOBAL, 0x04, 0x0000); - - /* - * Set the default address aging time to 5 minutes, and - * enable address learn messages to be sent to all message - * ports. - */ - REG_WRITE(REG_GLOBAL, 0x0a, 0x0148); - - /* - * Configure the priority mapping registers. - */ - ret = mv88e6xxx_config_prio(ds); - if (ret < 0) - return ret; - - /* - * Configure the upstream port, and configure the upstream - * port as the port to which ingress and egress monitor frames - * are to be sent. - */ - REG_WRITE(REG_GLOBAL, 0x1a, (dsa_upstream_port(ds) * 0x1110)); - - /* - * Disable remote management for now, and set the switch's - * DSA device number. - */ - REG_WRITE(REG_GLOBAL, 0x1c, ds->index & 0x1f); - - /* - * Send all frames with destination addresses matching - * 01:80:c2:00:00:2x to the CPU port. - */ - REG_WRITE(REG_GLOBAL2, 0x02, 0xffff); - - /* - * Send all frames with destination addresses matching - * 01:80:c2:00:00:0x to the CPU port. - */ - REG_WRITE(REG_GLOBAL2, 0x03, 0xffff); - - /* - * Disable the loopback filter, disable flow control - * messages, disable flood broadcast override, disable - * removing of provider tags, disable ATU age violation - * interrupts, disable tag flow control, force flow - * control priority to the highest, and send all special - * multicast frames to the CPU at the highest priority. - */ - REG_WRITE(REG_GLOBAL2, 0x05, 0x00ff); - - /* - * Program the DSA routing table. - */ - for (i = 0; i < 32; i++) { - int nexthop; - - nexthop = 0x1f; - if (i != ds->index && i < ds->dst->pd->nr_chips) - nexthop = ds->pd->rtable[i] & 0x1f; - - REG_WRITE(REG_GLOBAL2, 0x06, 0x8000 | (i << 8) | nexthop); - } - - /* - * Clear all trunk masks. - */ - for (i = 0; i < 8; i++) - REG_WRITE(REG_GLOBAL2, 0x07, 0x8000 | (i << 12) | 0xff); - - /* - * Clear all trunk mappings. - */ - for (i = 0; i < 16; i++) - REG_WRITE(REG_GLOBAL2, 0x08, 0x8000 | (i << 11)); - - /* - * Disable ingress rate limiting by resetting all ingress - * rate limit registers to their initial state. - */ - for (i = 0; i < 6; i++) - REG_WRITE(REG_GLOBAL2, 0x09, 0x9000 | (i << 8)); - - /* - * Initialise cross-chip port VLAN table to reset defaults. - */ - REG_WRITE(REG_GLOBAL2, 0x0b, 0x9000); - - /* - * Clear the priority override table. - */ - for (i = 0; i < 16; i++) - REG_WRITE(REG_GLOBAL2, 0x0f, 0x8000 | (i << 8)); - - /* @@@ initialise AVB (22/23) watchdog (27) sdet (29) registers */ - - return 0; -} - -static int mv88e6123_61_65_setup_port(struct dsa_switch *ds, int p) -{ - int addr = REG_PORT(p); - u16 val; - - /* - * MAC Forcing register: don't force link, speed, duplex - * or flow control state to any particular values on physical - * ports, but force the CPU port and all DSA ports to 1000 Mb/s - * full duplex. - */ - if (dsa_is_cpu_port(ds, p) || ds->dsa_port_mask & (1 << p)) - REG_WRITE(addr, 0x01, 0x003e); - else - REG_WRITE(addr, 0x01, 0x0003); - - /* - * Do not limit the period of time that this port can be - * paused for by the remote end or the period of time that - * this port can pause the remote end. - */ - REG_WRITE(addr, 0x02, 0x0000); - - /* - * Port Control: disable Drop-on-Unlock, disable Drop-on-Lock, - * disable Header mode, enable IGMP/MLD snooping, disable VLAN - * tunneling, determine priority by looking at 802.1p and IP - * priority fields (IP prio has precedence), and set STP state - * to Forwarding. - * - * If this is the CPU link, use DSA or EDSA tagging depending - * on which tagging mode was configured. - * - * If this is a link to another switch, use DSA tagging mode. - * - * If this is the upstream port for this switch, enable - * forwarding of unknown unicasts and multicasts. - */ - val = 0x0433; - if (dsa_is_cpu_port(ds, p)) { - if (ds->dst->tag_protocol == htons(ETH_P_EDSA)) - val |= 0x3300; - else - val |= 0x0100; - } - if (ds->dsa_port_mask & (1 << p)) - val |= 0x0100; - if (p == dsa_upstream_port(ds)) - val |= 0x000c; - REG_WRITE(addr, 0x04, val); - - /* - * Port Control 1: disable trunking. Also, if this is the - * CPU port, enable learn messages to be sent to this port. - */ - REG_WRITE(addr, 0x05, dsa_is_cpu_port(ds, p) ? 0x8000 : 0x0000); - - /* - * Port based VLAN map: give each port its own address - * database, allow the CPU port to talk to each of the 'real' - * ports, and allow each of the 'real' ports to only talk to - * the upstream port. - */ - val = (p & 0xf) << 12; - if (dsa_is_cpu_port(ds, p)) - val |= ds->phys_port_mask; - else - val |= 1 << dsa_upstream_port(ds); - REG_WRITE(addr, 0x06, val); - - /* - * Default VLAN ID and priority: don't set a default VLAN - * ID, and set the default packet priority to zero. - */ - REG_WRITE(addr, 0x07, 0x0000); - - /* - * Port Control 2: don't force a good FCS, set the maximum - * frame size to 10240 bytes, don't let the switch add or - * strip 802.1q tags, don't discard tagged or untagged frames - * on this port, do a destination address lookup on all - * received packets as usual, disable ARP mirroring and don't - * send a copy of all transmitted/received frames on this port - * to the CPU. - */ - REG_WRITE(addr, 0x08, 0x2080); - - /* - * Egress rate control: disable egress rate control. - */ - REG_WRITE(addr, 0x09, 0x0001); - - /* - * Egress rate control 2: disable egress rate control. - */ - REG_WRITE(addr, 0x0a, 0x0000); - - /* - * Port Association Vector: when learning source addresses - * of packets, add the address to the address database using - * a port bitmap that has only the bit for this port set and - * the other bits clear. - */ - REG_WRITE(addr, 0x0b, 1 << p); - - /* - * Port ATU control: disable limiting the number of address - * database entries that this port is allowed to use. - */ - REG_WRITE(addr, 0x0c, 0x0000); - - /* - * Priorit Override: disable DA, SA and VTU priority override. - */ - REG_WRITE(addr, 0x0d, 0x0000); - - /* - * Port Ethertype: use the Ethertype DSA Ethertype value. - */ - REG_WRITE(addr, 0x0f, ETH_P_EDSA); - - /* - * Tag Remap: use an identity 802.1p prio -> switch prio - * mapping. - */ - REG_WRITE(addr, 0x18, 0x3210); - - /* - * Tag Remap 2: use an identity 802.1p prio -> switch prio - * mapping. - */ - REG_WRITE(addr, 0x19, 0x7654); - - return 0; -} - -static int mv88e6123_61_65_setup(struct dsa_switch *ds) -{ - struct mv88e6xxx_priv_state *ps = (void *)(ds + 1); - int i; - int ret; - - mutex_init(&ps->smi_mutex); - mutex_init(&ps->stats_mutex); - - ret = mv88e6123_61_65_switch_reset(ds); - if (ret < 0) - return ret; - - /* @@@ initialise vtu and atu */ - - ret = mv88e6123_61_65_setup_global(ds); - if (ret < 0) - return ret; - - for (i = 0; i < 6; i++) { - ret = mv88e6123_61_65_setup_port(ds, i); - if (ret < 0) - return ret; - } - - return 0; -} - -static int mv88e6123_61_65_port_to_phy_addr(int port) -{ - if (port >= 0 && port <= 4) - return port; - return -1; -} - -static int -mv88e6123_61_65_phy_read(struct dsa_switch *ds, int port, int regnum) -{ - int addr = mv88e6123_61_65_port_to_phy_addr(port); - return mv88e6xxx_phy_read(ds, addr, regnum); -} - -static int -mv88e6123_61_65_phy_write(struct dsa_switch *ds, - int port, int regnum, u16 val) -{ - int addr = mv88e6123_61_65_port_to_phy_addr(port); - return mv88e6xxx_phy_write(ds, addr, regnum, val); -} - -static struct mv88e6xxx_hw_stat mv88e6123_61_65_hw_stats[] = { - { "in_good_octets", 8, 0x00, }, - { "in_bad_octets", 4, 0x02, }, - { "in_unicast", 4, 0x04, }, - { "in_broadcasts", 4, 0x06, }, - { "in_multicasts", 4, 0x07, }, - { "in_pause", 4, 0x16, }, - { "in_undersize", 4, 0x18, }, - { "in_fragments", 4, 0x19, }, - { "in_oversize", 4, 0x1a, }, - { "in_jabber", 4, 0x1b, }, - { "in_rx_error", 4, 0x1c, }, - { "in_fcs_error", 4, 0x1d, }, - { "out_octets", 8, 0x0e, }, - { "out_unicast", 4, 0x10, }, - { "out_broadcasts", 4, 0x13, }, - { "out_multicasts", 4, 0x12, }, - { "out_pause", 4, 0x15, }, - { "excessive", 4, 0x11, }, - { "collisions", 4, 0x1e, }, - { "deferred", 4, 0x05, }, - { "single", 4, 0x14, }, - { "multiple", 4, 0x17, }, - { "out_fcs_error", 4, 0x03, }, - { "late", 4, 0x1f, }, - { "hist_64bytes", 4, 0x08, }, - { "hist_65_127bytes", 4, 0x09, }, - { "hist_128_255bytes", 4, 0x0a, }, - { "hist_256_511bytes", 4, 0x0b, }, - { "hist_512_1023bytes", 4, 0x0c, }, - { "hist_1024_max_bytes", 4, 0x0d, }, -}; - -static void -mv88e6123_61_65_get_strings(struct dsa_switch *ds, int port, uint8_t *data) -{ - mv88e6xxx_get_strings(ds, ARRAY_SIZE(mv88e6123_61_65_hw_stats), - mv88e6123_61_65_hw_stats, port, data); -} - -static void -mv88e6123_61_65_get_ethtool_stats(struct dsa_switch *ds, - int port, uint64_t *data) -{ - mv88e6xxx_get_ethtool_stats(ds, ARRAY_SIZE(mv88e6123_61_65_hw_stats), - mv88e6123_61_65_hw_stats, port, data); -} - -static int mv88e6123_61_65_get_sset_count(struct dsa_switch *ds) -{ - return ARRAY_SIZE(mv88e6123_61_65_hw_stats); -} - -static struct dsa_switch_driver mv88e6123_61_65_switch_driver = { - .tag_protocol = cpu_to_be16(ETH_P_EDSA), - .priv_size = sizeof(struct mv88e6xxx_priv_state), - .probe = mv88e6123_61_65_probe, - .setup = mv88e6123_61_65_setup, - .set_addr = mv88e6xxx_set_addr_indirect, - .phy_read = mv88e6123_61_65_phy_read, - .phy_write = mv88e6123_61_65_phy_write, - .poll_link = mv88e6xxx_poll_link, - .get_strings = mv88e6123_61_65_get_strings, - .get_ethtool_stats = mv88e6123_61_65_get_ethtool_stats, - .get_sset_count = mv88e6123_61_65_get_sset_count, -}; - -static int __init mv88e6123_61_65_init(void) -{ - register_switch_driver(&mv88e6123_61_65_switch_driver); - return 0; -} -module_init(mv88e6123_61_65_init); - -static void __exit mv88e6123_61_65_cleanup(void) -{ - unregister_switch_driver(&mv88e6123_61_65_switch_driver); -} -module_exit(mv88e6123_61_65_cleanup); diff --git a/net/dsa/mv88e6131.c b/net/dsa/mv88e6131.c deleted file mode 100644 index 9bd1061fa4ee..000000000000 --- a/net/dsa/mv88e6131.c +++ /dev/null @@ -1,443 +0,0 @@ -/* - * net/dsa/mv88e6131.c - Marvell 88e6095/6095f/6131 switch chip support - * Copyright (c) 2008-2009 Marvell Semiconductor - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - */ - -#include <linux/list.h> -#include <linux/netdevice.h> -#include <linux/phy.h> -#include "dsa_priv.h" -#include "mv88e6xxx.h" - -/* - * Switch product IDs - */ -#define ID_6085 0x04a0 -#define ID_6095 0x0950 -#define ID_6131 0x1060 - -static char *mv88e6131_probe(struct mii_bus *bus, int sw_addr) -{ - int ret; - - ret = __mv88e6xxx_reg_read(bus, sw_addr, REG_PORT(0), 0x03); - if (ret >= 0) { - ret &= 0xfff0; - if (ret == ID_6085) - return "Marvell 88E6085"; - if (ret == ID_6095) - return "Marvell 88E6095/88E6095F"; - if (ret == ID_6131) - return "Marvell 88E6131"; - } - - return NULL; -} - -static int mv88e6131_switch_reset(struct dsa_switch *ds) -{ - int i; - int ret; - - /* - * Set all ports to the disabled state. - */ - for (i = 0; i < 11; i++) { - ret = REG_READ(REG_PORT(i), 0x04); - REG_WRITE(REG_PORT(i), 0x04, ret & 0xfffc); - } - - /* - * Wait for transmit queues to drain. - */ - msleep(2); - - /* - * Reset the switch. - */ - REG_WRITE(REG_GLOBAL, 0x04, 0xc400); - - /* - * Wait up to one second for reset to complete. - */ - for (i = 0; i < 1000; i++) { - ret = REG_READ(REG_GLOBAL, 0x00); - if ((ret & 0xc800) == 0xc800) - break; - - msleep(1); - } - if (i == 1000) - return -ETIMEDOUT; - - return 0; -} - -static int mv88e6131_setup_global(struct dsa_switch *ds) -{ - int ret; - int i; - - /* - * Enable the PHY polling unit, don't discard packets with - * excessive collisions, use a weighted fair queueing scheme - * to arbitrate between packet queues, set the maximum frame - * size to 1632, and mask all interrupt sources. - */ - REG_WRITE(REG_GLOBAL, 0x04, 0x4400); - - /* - * Set the default address aging time to 5 minutes, and - * enable address learn messages to be sent to all message - * ports. - */ - REG_WRITE(REG_GLOBAL, 0x0a, 0x0148); - - /* - * Configure the priority mapping registers. - */ - ret = mv88e6xxx_config_prio(ds); - if (ret < 0) - return ret; - - /* - * Set the VLAN ethertype to 0x8100. - */ - REG_WRITE(REG_GLOBAL, 0x19, 0x8100); - - /* - * Disable ARP mirroring, and configure the upstream port as - * the port to which ingress and egress monitor frames are to - * be sent. - */ - REG_WRITE(REG_GLOBAL, 0x1a, (dsa_upstream_port(ds) * 0x1100) | 0x00f0); - - /* - * Disable cascade port functionality unless this device - * is used in a cascade configuration, and set the switch's - * DSA device number. - */ - if (ds->dst->pd->nr_chips > 1) - REG_WRITE(REG_GLOBAL, 0x1c, 0xf000 | (ds->index & 0x1f)); - else - REG_WRITE(REG_GLOBAL, 0x1c, 0xe000 | (ds->index & 0x1f)); - - /* - * Send all frames with destination addresses matching - * 01:80:c2:00:00:0x to the CPU port. - */ - REG_WRITE(REG_GLOBAL2, 0x03, 0xffff); - - /* - * Ignore removed tag data on doubly tagged packets, disable - * flow control messages, force flow control priority to the - * highest, and send all special multicast frames to the CPU - * port at the highest priority. - */ - REG_WRITE(REG_GLOBAL2, 0x05, 0x00ff); - - /* - * Program the DSA routing table. - */ - for (i = 0; i < 32; i++) { - int nexthop; - - nexthop = 0x1f; - if (i != ds->index && i < ds->dst->pd->nr_chips) - nexthop = ds->pd->rtable[i] & 0x1f; - - REG_WRITE(REG_GLOBAL2, 0x06, 0x8000 | (i << 8) | nexthop); - } - - /* - * Clear all trunk masks. - */ - for (i = 0; i < 8; i++) - REG_WRITE(REG_GLOBAL2, 0x07, 0x8000 | (i << 12) | 0x7ff); - - /* - * Clear all trunk mappings. - */ - for (i = 0; i < 16; i++) - REG_WRITE(REG_GLOBAL2, 0x08, 0x8000 | (i << 11)); - - /* - * Force the priority of IGMP/MLD snoop frames and ARP frames - * to the highest setting. - */ - REG_WRITE(REG_GLOBAL2, 0x0f, 0x00ff); - - return 0; -} - -static int mv88e6131_setup_port(struct dsa_switch *ds, int p) -{ - struct mv88e6xxx_priv_state *ps = (void *)(ds + 1); - int addr = REG_PORT(p); - u16 val; - - /* - * MAC Forcing register: don't force link, speed, duplex - * or flow control state to any particular values on physical - * ports, but force the CPU port and all DSA ports to 1000 Mb/s - * (100 Mb/s on 6085) full duplex. - */ - if (dsa_is_cpu_port(ds, p) || ds->dsa_port_mask & (1 << p)) - if (ps->id == ID_6085) - REG_WRITE(addr, 0x01, 0x003d); /* 100 Mb/s */ - else - REG_WRITE(addr, 0x01, 0x003e); /* 1000 Mb/s */ - else - REG_WRITE(addr, 0x01, 0x0003); - - /* - * Port Control: disable Core Tag, disable Drop-on-Lock, - * transmit frames unmodified, disable Header mode, - * enable IGMP/MLD snoop, disable DoubleTag, disable VLAN - * tunneling, determine priority by looking at 802.1p and - * IP priority fields (IP prio has precedence), and set STP - * state to Forwarding. - * - * If this is the upstream port for this switch, enable - * forwarding of unknown unicasts, and enable DSA tagging - * mode. - * - * If this is the link to another switch, use DSA tagging - * mode, but do not enable forwarding of unknown unicasts. - */ - val = 0x0433; - if (p == dsa_upstream_port(ds)) { - val |= 0x0104; - /* - * On 6085, unknown multicast forward is controlled - * here rather than in Port Control 2 register. - */ - if (ps->id == ID_6085) - val |= 0x0008; - } - if (ds->dsa_port_mask & (1 << p)) - val |= 0x0100; - REG_WRITE(addr, 0x04, val); - - /* - * Port Control 1: disable trunking. Also, if this is the - * CPU port, enable learn messages to be sent to this port. - */ - REG_WRITE(addr, 0x05, dsa_is_cpu_port(ds, p) ? 0x8000 : 0x0000); - - /* - * Port based VLAN map: give each port its own address - * database, allow the CPU port to talk to each of the 'real' - * ports, and allow each of the 'real' ports to only talk to - * the upstream port. - */ - val = (p & 0xf) << 12; - if (dsa_is_cpu_port(ds, p)) - val |= ds->phys_port_mask; - else - val |= 1 << dsa_upstream_port(ds); - REG_WRITE(addr, 0x06, val); - - /* - * Default VLAN ID and priority: don't set a default VLAN - * ID, and set the default packet priority to zero. - */ - REG_WRITE(addr, 0x07, 0x0000); - - /* - * Port Control 2: don't force a good FCS, don't use - * VLAN-based, source address-based or destination - * address-based priority overrides, don't let the switch - * add or strip 802.1q tags, don't discard tagged or - * untagged frames on this port, do a destination address - * lookup on received packets as usual, don't send a copy - * of all transmitted/received frames on this port to the - * CPU, and configure the upstream port number. - * - * If this is the upstream port for this switch, enable - * forwarding of unknown multicast addresses. - */ - if (ps->id == ID_6085) - /* - * on 6085, bits 3:0 are reserved, bit 6 control ARP - * mirroring, and multicast forward is handled in - * Port Control register. - */ - REG_WRITE(addr, 0x08, 0x0080); - else { - val = 0x0080 | dsa_upstream_port(ds); - if (p == dsa_upstream_port(ds)) - val |= 0x0040; - REG_WRITE(addr, 0x08, val); - } - - /* - * Rate Control: disable ingress rate limiting. - */ - REG_WRITE(addr, 0x09, 0x0000); - - /* - * Rate Control 2: disable egress rate limiting. - */ - REG_WRITE(addr, 0x0a, 0x0000); - - /* - * Port Association Vector: when learning source addresses - * of packets, add the address to the address database using - * a port bitmap that has only the bit for this port set and - * the other bits clear. - */ - REG_WRITE(addr, 0x0b, 1 << p); - - /* - * Tag Remap: use an identity 802.1p prio -> switch prio - * mapping. - */ - REG_WRITE(addr, 0x18, 0x3210); - - /* - * Tag Remap 2: use an identity 802.1p prio -> switch prio - * mapping. - */ - REG_WRITE(addr, 0x19, 0x7654); - - return 0; -} - -static int mv88e6131_setup(struct dsa_switch *ds) -{ - struct mv88e6xxx_priv_state *ps = (void *)(ds + 1); - int i; - int ret; - - mutex_init(&ps->smi_mutex); - mv88e6xxx_ppu_state_init(ds); - mutex_init(&ps->stats_mutex); - - ps->id = REG_READ(REG_PORT(0), 0x03) & 0xfff0; - - ret = mv88e6131_switch_reset(ds); - if (ret < 0) - return ret; - - /* @@@ initialise vtu and atu */ - - ret = mv88e6131_setup_global(ds); - if (ret < 0) - return ret; - - for (i = 0; i < 11; i++) { - ret = mv88e6131_setup_port(ds, i); - if (ret < 0) - return ret; - } - - return 0; -} - -static int mv88e6131_port_to_phy_addr(int port) -{ - if (port >= 0 && port <= 11) - return port; - return -1; -} - -static int -mv88e6131_phy_read(struct dsa_switch *ds, int port, int regnum) -{ - int addr = mv88e6131_port_to_phy_addr(port); - return mv88e6xxx_phy_read_ppu(ds, addr, regnum); -} - -static int -mv88e6131_phy_write(struct dsa_switch *ds, - int port, int regnum, u16 val) -{ - int addr = mv88e6131_port_to_phy_addr(port); - return mv88e6xxx_phy_write_ppu(ds, addr, regnum, val); -} - -static struct mv88e6xxx_hw_stat mv88e6131_hw_stats[] = { - { "in_good_octets", 8, 0x00, }, - { "in_bad_octets", 4, 0x02, }, - { "in_unicast", 4, 0x04, }, - { "in_broadcasts", 4, 0x06, }, - { "in_multicasts", 4, 0x07, }, - { "in_pause", 4, 0x16, }, - { "in_undersize", 4, 0x18, }, - { "in_fragments", 4, 0x19, }, - { "in_oversize", 4, 0x1a, }, - { "in_jabber", 4, 0x1b, }, - { "in_rx_error", 4, 0x1c, }, - { "in_fcs_error", 4, 0x1d, }, - { "out_octets", 8, 0x0e, }, - { "out_unicast", 4, 0x10, }, - { "out_broadcasts", 4, 0x13, }, - { "out_multicasts", 4, 0x12, }, - { "out_pause", 4, 0x15, }, - { "excessive", 4, 0x11, }, - { "collisions", 4, 0x1e, }, - { "deferred", 4, 0x05, }, - { "single", 4, 0x14, }, - { "multiple", 4, 0x17, }, - { "out_fcs_error", 4, 0x03, }, - { "late", 4, 0x1f, }, - { "hist_64bytes", 4, 0x08, }, - { "hist_65_127bytes", 4, 0x09, }, - { "hist_128_255bytes", 4, 0x0a, }, - { "hist_256_511bytes", 4, 0x0b, }, - { "hist_512_1023bytes", 4, 0x0c, }, - { "hist_1024_max_bytes", 4, 0x0d, }, -}; - -static void -mv88e6131_get_strings(struct dsa_switch *ds, int port, uint8_t *data) -{ - mv88e6xxx_get_strings(ds, ARRAY_SIZE(mv88e6131_hw_stats), - mv88e6131_hw_stats, port, data); -} - -static void -mv88e6131_get_ethtool_stats(struct dsa_switch *ds, - int port, uint64_t *data) -{ - mv88e6xxx_get_ethtool_stats(ds, ARRAY_SIZE(mv88e6131_hw_stats), - mv88e6131_hw_stats, port, data); -} - -static int mv88e6131_get_sset_count(struct dsa_switch *ds) -{ - return ARRAY_SIZE(mv88e6131_hw_stats); -} - -static struct dsa_switch_driver mv88e6131_switch_driver = { - .tag_protocol = cpu_to_be16(ETH_P_DSA), - .priv_size = sizeof(struct mv88e6xxx_priv_state), - .probe = mv88e6131_probe, - .setup = mv88e6131_setup, - .set_addr = mv88e6xxx_set_addr_direct, - .phy_read = mv88e6131_phy_read, - .phy_write = mv88e6131_phy_write, - .poll_link = mv88e6xxx_poll_link, - .get_strings = mv88e6131_get_strings, - .get_ethtool_stats = mv88e6131_get_ethtool_stats, - .get_sset_count = mv88e6131_get_sset_count, -}; - -static int __init mv88e6131_init(void) -{ - register_switch_driver(&mv88e6131_switch_driver); - return 0; -} -module_init(mv88e6131_init); - -static void __exit mv88e6131_cleanup(void) -{ - unregister_switch_driver(&mv88e6131_switch_driver); -} -module_exit(mv88e6131_cleanup); diff --git a/net/dsa/mv88e6xxx.c b/net/dsa/mv88e6xxx.c deleted file mode 100644 index efe661a9def4..000000000000 --- a/net/dsa/mv88e6xxx.c +++ /dev/null @@ -1,522 +0,0 @@ -/* - * net/dsa/mv88e6xxx.c - Marvell 88e6xxx switch chip support - * Copyright (c) 2008 Marvell Semiconductor - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - */ - -#include <linux/list.h> -#include <linux/netdevice.h> -#include <linux/phy.h> -#include "dsa_priv.h" -#include "mv88e6xxx.h" - -/* - * If the switch's ADDR[4:0] strap pins are strapped to zero, it will - * use all 32 SMI bus addresses on its SMI bus, and all switch registers - * will be directly accessible on some {device address,register address} - * pair. If the ADDR[4:0] pins are not strapped to zero, the switch - * will only respond to SMI transactions to that specific address, and - * an indirect addressing mechanism needs to be used to access its - * registers. - */ -static int mv88e6xxx_reg_wait_ready(struct mii_bus *bus, int sw_addr) -{ - int ret; - int i; - - for (i = 0; i < 16; i++) { - ret = mdiobus_read(bus, sw_addr, 0); - if (ret < 0) - return ret; - - if ((ret & 0x8000) == 0) - return 0; - } - - return -ETIMEDOUT; -} - -int __mv88e6xxx_reg_read(struct mii_bus *bus, int sw_addr, int addr, int reg) -{ - int ret; - - if (sw_addr == 0) - return mdiobus_read(bus, addr, reg); - - /* - * Wait for the bus to become free. - */ - ret = mv88e6xxx_reg_wait_ready(bus, sw_addr); - if (ret < 0) - return ret; - - /* - * Transmit the read command. - */ - ret = mdiobus_write(bus, sw_addr, 0, 0x9800 | (addr << 5) | reg); - if (ret < 0) - return ret; - - /* - * Wait for the read command to complete. - */ - ret = mv88e6xxx_reg_wait_ready(bus, sw_addr); - if (ret < 0) - return ret; - - /* - * Read the data. - */ - ret = mdiobus_read(bus, sw_addr, 1); - if (ret < 0) - return ret; - - return ret & 0xffff; -} - -int mv88e6xxx_reg_read(struct dsa_switch *ds, int addr, int reg) -{ - struct mv88e6xxx_priv_state *ps = (void *)(ds + 1); - int ret; - - mutex_lock(&ps->smi_mutex); - ret = __mv88e6xxx_reg_read(ds->master_mii_bus, - ds->pd->sw_addr, addr, reg); - mutex_unlock(&ps->smi_mutex); - - return ret; -} - -int __mv88e6xxx_reg_write(struct mii_bus *bus, int sw_addr, int addr, - int reg, u16 val) -{ - int ret; - - if (sw_addr == 0) - return mdiobus_write(bus, addr, reg, val); - - /* - * Wait for the bus to become free. - */ - ret = mv88e6xxx_reg_wait_ready(bus, sw_addr); - if (ret < 0) - return ret; - - /* - * Transmit the data to write. - */ - ret = mdiobus_write(bus, sw_addr, 1, val); - if (ret < 0) - return ret; - - /* - * Transmit the write command. - */ - ret = mdiobus_write(bus, sw_addr, 0, 0x9400 | (addr << 5) | reg); - if (ret < 0) - return ret; - - /* - * Wait for the write command to complete. - */ - ret = mv88e6xxx_reg_wait_ready(bus, sw_addr); - if (ret < 0) - return ret; - - return 0; -} - -int mv88e6xxx_reg_write(struct dsa_switch *ds, int addr, int reg, u16 val) -{ - struct mv88e6xxx_priv_state *ps = (void *)(ds + 1); - int ret; - - mutex_lock(&ps->smi_mutex); - ret = __mv88e6xxx_reg_write(ds->master_mii_bus, - ds->pd->sw_addr, addr, reg, val); - mutex_unlock(&ps->smi_mutex); - - return ret; -} - -int mv88e6xxx_config_prio(struct dsa_switch *ds) -{ - /* - * Configure the IP ToS mapping registers. - */ - REG_WRITE(REG_GLOBAL, 0x10, 0x0000); - REG_WRITE(REG_GLOBAL, 0x11, 0x0000); - REG_WRITE(REG_GLOBAL, 0x12, 0x5555); - REG_WRITE(REG_GLOBAL, 0x13, 0x5555); - REG_WRITE(REG_GLOBAL, 0x14, 0xaaaa); - REG_WRITE(REG_GLOBAL, 0x15, 0xaaaa); - REG_WRITE(REG_GLOBAL, 0x16, 0xffff); - REG_WRITE(REG_GLOBAL, 0x17, 0xffff); - - /* - * Configure the IEEE 802.1p priority mapping register. - */ - REG_WRITE(REG_GLOBAL, 0x18, 0xfa41); - - return 0; -} - -int mv88e6xxx_set_addr_direct(struct dsa_switch *ds, u8 *addr) -{ - REG_WRITE(REG_GLOBAL, 0x01, (addr[0] << 8) | addr[1]); - REG_WRITE(REG_GLOBAL, 0x02, (addr[2] << 8) | addr[3]); - REG_WRITE(REG_GLOBAL, 0x03, (addr[4] << 8) | addr[5]); - - return 0; -} - -int mv88e6xxx_set_addr_indirect(struct dsa_switch *ds, u8 *addr) -{ - int i; - int ret; - - for (i = 0; i < 6; i++) { - int j; - - /* - * Write the MAC address byte. - */ - REG_WRITE(REG_GLOBAL2, 0x0d, 0x8000 | (i << 8) | addr[i]); - - /* - * Wait for the write to complete. - */ - for (j = 0; j < 16; j++) { - ret = REG_READ(REG_GLOBAL2, 0x0d); - if ((ret & 0x8000) == 0) - break; - } - if (j == 16) - return -ETIMEDOUT; - } - - return 0; -} - -int mv88e6xxx_phy_read(struct dsa_switch *ds, int addr, int regnum) -{ - if (addr >= 0) - return mv88e6xxx_reg_read(ds, addr, regnum); - return 0xffff; -} - -int mv88e6xxx_phy_write(struct dsa_switch *ds, int addr, int regnum, u16 val) -{ - if (addr >= 0) - return mv88e6xxx_reg_write(ds, addr, regnum, val); - return 0; -} - -#ifdef CONFIG_NET_DSA_MV88E6XXX_NEED_PPU -static int mv88e6xxx_ppu_disable(struct dsa_switch *ds) -{ - int ret; - int i; - - ret = REG_READ(REG_GLOBAL, 0x04); - REG_WRITE(REG_GLOBAL, 0x04, ret & ~0x4000); - - for (i = 0; i < 1000; i++) { - ret = REG_READ(REG_GLOBAL, 0x00); - msleep(1); - if ((ret & 0xc000) != 0xc000) - return 0; - } - - return -ETIMEDOUT; -} - -static int mv88e6xxx_ppu_enable(struct dsa_switch *ds) -{ - int ret; - int i; - - ret = REG_READ(REG_GLOBAL, 0x04); - REG_WRITE(REG_GLOBAL, 0x04, ret | 0x4000); - - for (i = 0; i < 1000; i++) { - ret = REG_READ(REG_GLOBAL, 0x00); - msleep(1); - if ((ret & 0xc000) == 0xc000) - return 0; - } - - return -ETIMEDOUT; -} - -static void mv88e6xxx_ppu_reenable_work(struct work_struct *ugly) -{ - struct mv88e6xxx_priv_state *ps; - - ps = container_of(ugly, struct mv88e6xxx_priv_state, ppu_work); - if (mutex_trylock(&ps->ppu_mutex)) { - struct dsa_switch *ds = ((struct dsa_switch *)ps) - 1; - - if (mv88e6xxx_ppu_enable(ds) == 0) - ps->ppu_disabled = 0; - mutex_unlock(&ps->ppu_mutex); - } -} - -static void mv88e6xxx_ppu_reenable_timer(unsigned long _ps) -{ - struct mv88e6xxx_priv_state *ps = (void *)_ps; - - schedule_work(&ps->ppu_work); -} - -static int mv88e6xxx_ppu_access_get(struct dsa_switch *ds) -{ - struct mv88e6xxx_priv_state *ps = (void *)(ds + 1); - int ret; - - mutex_lock(&ps->ppu_mutex); - - /* - * If the PHY polling unit is enabled, disable it so that - * we can access the PHY registers. If it was already - * disabled, cancel the timer that is going to re-enable - * it. - */ - if (!ps->ppu_disabled) { - ret = mv88e6xxx_ppu_disable(ds); - if (ret < 0) { - mutex_unlock(&ps->ppu_mutex); - return ret; - } - ps->ppu_disabled = 1; - } else { - del_timer(&ps->ppu_timer); - ret = 0; - } - - return ret; -} - -static void mv88e6xxx_ppu_access_put(struct dsa_switch *ds) -{ - struct mv88e6xxx_priv_state *ps = (void *)(ds + 1); - - /* - * Schedule a timer to re-enable the PHY polling unit. - */ - mod_timer(&ps->ppu_timer, jiffies + msecs_to_jiffies(10)); - mutex_unlock(&ps->ppu_mutex); -} - -void mv88e6xxx_ppu_state_init(struct dsa_switch *ds) -{ - struct mv88e6xxx_priv_state *ps = (void *)(ds + 1); - - mutex_init(&ps->ppu_mutex); - INIT_WORK(&ps->ppu_work, mv88e6xxx_ppu_reenable_work); - init_timer(&ps->ppu_timer); - ps->ppu_timer.data = (unsigned long)ps; - ps->ppu_timer.function = mv88e6xxx_ppu_reenable_timer; -} - -int mv88e6xxx_phy_read_ppu(struct dsa_switch *ds, int addr, int regnum) -{ - int ret; - - ret = mv88e6xxx_ppu_access_get(ds); - if (ret >= 0) { - ret = mv88e6xxx_reg_read(ds, addr, regnum); - mv88e6xxx_ppu_access_put(ds); - } - - return ret; -} - -int mv88e6xxx_phy_write_ppu(struct dsa_switch *ds, int addr, - int regnum, u16 val) -{ - int ret; - - ret = mv88e6xxx_ppu_access_get(ds); - if (ret >= 0) { - ret = mv88e6xxx_reg_write(ds, addr, regnum, val); - mv88e6xxx_ppu_access_put(ds); - } - - return ret; -} -#endif - -void mv88e6xxx_poll_link(struct dsa_switch *ds) -{ - int i; - - for (i = 0; i < DSA_MAX_PORTS; i++) { - struct net_device *dev; - int uninitialized_var(port_status); - int link; - int speed; - int duplex; - int fc; - - dev = ds->ports[i]; - if (dev == NULL) - continue; - - link = 0; - if (dev->flags & IFF_UP) { - port_status = mv88e6xxx_reg_read(ds, REG_PORT(i), 0x00); - if (port_status < 0) - continue; - - link = !!(port_status & 0x0800); - } - - if (!link) { - if (netif_carrier_ok(dev)) { - printk(KERN_INFO "%s: link down\n", dev->name); - netif_carrier_off(dev); - } - continue; - } - - switch (port_status & 0x0300) { - case 0x0000: - speed = 10; - break; - case 0x0100: - speed = 100; - break; - case 0x0200: - speed = 1000; - break; - default: - speed = -1; - break; - } - duplex = (port_status & 0x0400) ? 1 : 0; - fc = (port_status & 0x8000) ? 1 : 0; - - if (!netif_carrier_ok(dev)) { - printk(KERN_INFO "%s: link up, %d Mb/s, %s duplex, " - "flow control %sabled\n", dev->name, - speed, duplex ? "full" : "half", - fc ? "en" : "dis"); - netif_carrier_on(dev); - } - } -} - -static int mv88e6xxx_stats_wait(struct dsa_switch *ds) -{ - int ret; - int i; - - for (i = 0; i < 10; i++) { - ret = REG_READ(REG_GLOBAL, 0x1d); - if ((ret & 0x8000) == 0) - return 0; - } - - return -ETIMEDOUT; -} - -static int mv88e6xxx_stats_snapshot(struct dsa_switch *ds, int port) -{ - int ret; - - /* - * Snapshot the hardware statistics counters for this port. - */ - REG_WRITE(REG_GLOBAL, 0x1d, 0xdc00 | port); - - /* - * Wait for the snapshotting to complete. - */ - ret = mv88e6xxx_stats_wait(ds); - if (ret < 0) - return ret; - - return 0; -} - -static void mv88e6xxx_stats_read(struct dsa_switch *ds, int stat, u32 *val) -{ - u32 _val; - int ret; - - *val = 0; - - ret = mv88e6xxx_reg_write(ds, REG_GLOBAL, 0x1d, 0xcc00 | stat); - if (ret < 0) - return; - - ret = mv88e6xxx_stats_wait(ds); - if (ret < 0) - return; - - ret = mv88e6xxx_reg_read(ds, REG_GLOBAL, 0x1e); - if (ret < 0) - return; - - _val = ret << 16; - - ret = mv88e6xxx_reg_read(ds, REG_GLOBAL, 0x1f); - if (ret < 0) - return; - - *val = _val | ret; -} - -void mv88e6xxx_get_strings(struct dsa_switch *ds, - int nr_stats, struct mv88e6xxx_hw_stat *stats, - int port, uint8_t *data) -{ - int i; - - for (i = 0; i < nr_stats; i++) { - memcpy(data + i * ETH_GSTRING_LEN, - stats[i].string, ETH_GSTRING_LEN); - } -} - -void mv88e6xxx_get_ethtool_stats(struct dsa_switch *ds, - int nr_stats, struct mv88e6xxx_hw_stat *stats, - int port, uint64_t *data) -{ - struct mv88e6xxx_priv_state *ps = (void *)(ds + 1); - int ret; - int i; - - mutex_lock(&ps->stats_mutex); - - ret = mv88e6xxx_stats_snapshot(ds, port); - if (ret < 0) { - mutex_unlock(&ps->stats_mutex); - return; - } - - /* - * Read each of the counters. - */ - for (i = 0; i < nr_stats; i++) { - struct mv88e6xxx_hw_stat *s = stats + i; - u32 low; - u32 high; - - mv88e6xxx_stats_read(ds, s->reg, &low); - if (s->sizeof_stat == 8) - mv88e6xxx_stats_read(ds, s->reg + 1, &high); - else - high = 0; - - data[i] = (((u64)high) << 32) | low; - } - - mutex_unlock(&ps->stats_mutex); -} diff --git a/net/dsa/mv88e6xxx.h b/net/dsa/mv88e6xxx.h deleted file mode 100644 index 61156ca26a0d..000000000000 --- a/net/dsa/mv88e6xxx.h +++ /dev/null @@ -1,95 +0,0 @@ -/* - * net/dsa/mv88e6xxx.h - Marvell 88e6xxx switch chip support - * Copyright (c) 2008 Marvell Semiconductor - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - */ - -#ifndef __MV88E6XXX_H -#define __MV88E6XXX_H - -#define REG_PORT(p) (0x10 + (p)) -#define REG_GLOBAL 0x1b -#define REG_GLOBAL2 0x1c - -struct mv88e6xxx_priv_state { - /* - * When using multi-chip addressing, this mutex protects - * access to the indirect access registers. (In single-chip - * mode, this mutex is effectively useless.) - */ - struct mutex smi_mutex; - -#ifdef CONFIG_NET_DSA_MV88E6XXX_NEED_PPU - /* - * Handles automatic disabling and re-enabling of the PHY - * polling unit. - */ - struct mutex ppu_mutex; - int ppu_disabled; - struct work_struct ppu_work; - struct timer_list ppu_timer; -#endif - - /* - * This mutex serialises access to the statistics unit. - * Hold this mutex over snapshot + dump sequences. - */ - struct mutex stats_mutex; - - int id; /* switch product id */ -}; - -struct mv88e6xxx_hw_stat { - char string[ETH_GSTRING_LEN]; - int sizeof_stat; - int reg; -}; - -int __mv88e6xxx_reg_read(struct mii_bus *bus, int sw_addr, int addr, int reg); -int mv88e6xxx_reg_read(struct dsa_switch *ds, int addr, int reg); -int __mv88e6xxx_reg_write(struct mii_bus *bus, int sw_addr, int addr, - int reg, u16 val); -int mv88e6xxx_reg_write(struct dsa_switch *ds, int addr, int reg, u16 val); -int mv88e6xxx_config_prio(struct dsa_switch *ds); -int mv88e6xxx_set_addr_direct(struct dsa_switch *ds, u8 *addr); -int mv88e6xxx_set_addr_indirect(struct dsa_switch *ds, u8 *addr); -int mv88e6xxx_phy_read(struct dsa_switch *ds, int addr, int regnum); -int mv88e6xxx_phy_write(struct dsa_switch *ds, int addr, int regnum, u16 val); -void mv88e6xxx_ppu_state_init(struct dsa_switch *ds); -int mv88e6xxx_phy_read_ppu(struct dsa_switch *ds, int addr, int regnum); -int mv88e6xxx_phy_write_ppu(struct dsa_switch *ds, int addr, - int regnum, u16 val); -void mv88e6xxx_poll_link(struct dsa_switch *ds); -void mv88e6xxx_get_strings(struct dsa_switch *ds, - int nr_stats, struct mv88e6xxx_hw_stat *stats, - int port, uint8_t *data); -void mv88e6xxx_get_ethtool_stats(struct dsa_switch *ds, - int nr_stats, struct mv88e6xxx_hw_stat *stats, - int port, uint64_t *data); - -#define REG_READ(addr, reg) \ - ({ \ - int __ret; \ - \ - __ret = mv88e6xxx_reg_read(ds, addr, reg); \ - if (__ret < 0) \ - return __ret; \ - __ret; \ - }) - -#define REG_WRITE(addr, reg, val) \ - ({ \ - int __ret; \ - \ - __ret = mv88e6xxx_reg_write(ds, addr, reg, val); \ - if (__ret < 0) \ - return __ret; \ - }) - - - -#endif diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 0a47b6c37038..56cf9b8e1c7c 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -301,7 +301,6 @@ static const struct net_device_ops dsa_netdev_ops = { .ndo_start_xmit = dsa_xmit, .ndo_change_rx_flags = dsa_slave_change_rx_flags, .ndo_set_rx_mode = dsa_slave_set_rx_mode, - .ndo_set_multicast_list = dsa_slave_set_rx_mode, .ndo_set_mac_address = dsa_slave_set_mac_address, .ndo_do_ioctl = dsa_slave_ioctl, }; @@ -314,7 +313,6 @@ static const struct net_device_ops edsa_netdev_ops = { .ndo_start_xmit = edsa_xmit, .ndo_change_rx_flags = dsa_slave_change_rx_flags, .ndo_set_rx_mode = dsa_slave_set_rx_mode, - .ndo_set_multicast_list = dsa_slave_set_rx_mode, .ndo_set_mac_address = dsa_slave_set_mac_address, .ndo_do_ioctl = dsa_slave_ioctl, }; @@ -327,7 +325,6 @@ static const struct net_device_ops trailer_netdev_ops = { .ndo_start_xmit = trailer_xmit, .ndo_change_rx_flags = dsa_slave_change_rx_flags, .ndo_set_rx_mode = dsa_slave_set_rx_mode, - .ndo_set_multicast_list = dsa_slave_set_rx_mode, .ndo_set_mac_address = dsa_slave_set_mac_address, .ndo_do_ioctl = dsa_slave_ioctl, }; diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c index 98dfe80b4538..cacce1e22f9c 100644 --- a/net/dsa/tag_dsa.c +++ b/net/dsa/tag_dsa.c @@ -186,20 +186,7 @@ out: return 0; } -static struct packet_type dsa_packet_type __read_mostly = { +struct packet_type dsa_packet_type __read_mostly = { .type = cpu_to_be16(ETH_P_DSA), .func = dsa_rcv, }; - -static int __init dsa_init_module(void) -{ - dev_add_pack(&dsa_packet_type); - return 0; -} -module_init(dsa_init_module); - -static void __exit dsa_cleanup_module(void) -{ - dev_remove_pack(&dsa_packet_type); -} -module_exit(dsa_cleanup_module); diff --git a/net/dsa/tag_edsa.c b/net/dsa/tag_edsa.c index 6f383322ad25..e70c43c25e64 100644 --- a/net/dsa/tag_edsa.c +++ b/net/dsa/tag_edsa.c @@ -205,20 +205,7 @@ out: return 0; } -static struct packet_type edsa_packet_type __read_mostly = { +struct packet_type edsa_packet_type __read_mostly = { .type = cpu_to_be16(ETH_P_EDSA), .func = edsa_rcv, }; - -static int __init edsa_init_module(void) -{ - dev_add_pack(&edsa_packet_type); - return 0; -} -module_init(edsa_init_module); - -static void __exit edsa_cleanup_module(void) -{ - dev_remove_pack(&edsa_packet_type); -} -module_exit(edsa_cleanup_module); diff --git a/net/dsa/tag_trailer.c b/net/dsa/tag_trailer.c index d6d7d0add3cb..94bc260d015d 100644 --- a/net/dsa/tag_trailer.c +++ b/net/dsa/tag_trailer.c @@ -114,20 +114,7 @@ out: return 0; } -static struct packet_type trailer_packet_type __read_mostly = { +struct packet_type trailer_packet_type __read_mostly = { .type = cpu_to_be16(ETH_P_TRAILER), .func = trailer_rcv, }; - -static int __init trailer_init_module(void) -{ - dev_add_pack(&trailer_packet_type); - return 0; -} -module_init(trailer_init_module); - -static void __exit trailer_cleanup_module(void) -{ - dev_remove_pack(&trailer_packet_type); -} -module_exit(trailer_cleanup_module); diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c index 1c1f26c5d672..7e717cb35ad1 100644 --- a/net/econet/af_econet.c +++ b/net/econet/af_econet.c @@ -322,6 +322,7 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock, /* Real hardware Econet. We're not worthy etc. */ #ifdef CONFIG_ECONET_NATIVE unsigned short proto = 0; + int hlen, tlen; int res; if (len + 15 > dev->mtu) { @@ -331,12 +332,14 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock, dev_hold(dev); - skb = sock_alloc_send_skb(sk, len + LL_ALLOCATED_SPACE(dev), + hlen = LL_RESERVED_SPACE(dev); + tlen = dev->needed_tailroom; + skb = sock_alloc_send_skb(sk, len + hlen + tlen, msg->msg_flags & MSG_DONTWAIT, &err); if (skb == NULL) goto out_unlock; - skb_reserve(skb, LL_RESERVED_SPACE(dev)); + skb_reserve(skb, hlen); skb_reset_network_header(skb); eb = (struct ec_cb *)&skb->cb; diff --git a/net/ieee802154/6lowpan.c b/net/ieee802154/6lowpan.c new file mode 100644 index 000000000000..e4ecc1eef98c --- /dev/null +++ b/net/ieee802154/6lowpan.c @@ -0,0 +1,1239 @@ +/* + * Copyright 2011, Siemens AG + * written by Alexander Smirnov <alex.bluesman.smirnov@gmail.com> + */ + +/* + * Based on patches from Jon Smirl <jonsmirl@gmail.com> + * Copyright (c) 2011 Jon Smirl <jonsmirl@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +/* Jon's code is based on 6lowpan implementation for Contiki which is: + * Copyright (c) 2008, Swedish Institute of Computer Science. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the Institute nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <linux/bitops.h> +#include <linux/if_arp.h> +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/netdevice.h> +#include <net/af_ieee802154.h> +#include <net/ieee802154.h> +#include <net/ieee802154_netdev.h> +#include <net/ipv6.h> + +#include "6lowpan.h" + +/* TTL uncompression values */ +static const u8 lowpan_ttl_values[] = {0, 1, 64, 255}; + +static LIST_HEAD(lowpan_devices); + +/* + * Uncompression of linklocal: + * 0 -> 16 bytes from packet + * 1 -> 2 bytes from prefix - bunch of zeroes and 8 from packet + * 2 -> 2 bytes from prefix - zeroes + 2 from packet + * 3 -> 2 bytes from prefix - infer 8 bytes from lladdr + * + * NOTE: => the uncompress function does change 0xf to 0x10 + * NOTE: 0x00 => no-autoconfig => unspecified + */ +static const u8 lowpan_unc_llconf[] = {0x0f, 0x28, 0x22, 0x20}; + +/* + * Uncompression of ctx-based: + * 0 -> 0 bits from packet [unspecified / reserved] + * 1 -> 8 bytes from prefix - bunch of zeroes and 8 from packet + * 2 -> 8 bytes from prefix - zeroes + 2 from packet + * 3 -> 8 bytes from prefix - infer 8 bytes from lladdr + */ +static const u8 lowpan_unc_ctxconf[] = {0x00, 0x88, 0x82, 0x80}; + +/* + * Uncompression of ctx-base + * 0 -> 0 bits from packet + * 1 -> 2 bytes from prefix - bunch of zeroes 5 from packet + * 2 -> 2 bytes from prefix - zeroes + 3 from packet + * 3 -> 2 bytes from prefix - infer 1 bytes from lladdr + */ +static const u8 lowpan_unc_mxconf[] = {0x0f, 0x25, 0x23, 0x21}; + +/* Link local prefix */ +static const u8 lowpan_llprefix[] = {0xfe, 0x80}; + +/* private device info */ +struct lowpan_dev_info { + struct net_device *real_dev; /* real WPAN device ptr */ + struct mutex dev_list_mtx; /* mutex for list ops */ +}; + +struct lowpan_dev_record { + struct net_device *ldev; + struct list_head list; +}; + +struct lowpan_fragment { + struct sk_buff *skb; /* skb to be assembled */ + spinlock_t lock; /* concurency lock */ + u16 length; /* length to be assemled */ + u32 bytes_rcv; /* bytes received */ + u16 tag; /* current fragment tag */ + struct timer_list timer; /* assembling timer */ + struct list_head list; /* fragments list */ +}; + +static unsigned short fragment_tag; +static LIST_HEAD(lowpan_fragments); +spinlock_t flist_lock; + +static inline struct +lowpan_dev_info *lowpan_dev_info(const struct net_device *dev) +{ + return netdev_priv(dev); +} + +static inline void lowpan_address_flip(u8 *src, u8 *dest) +{ + int i; + for (i = 0; i < IEEE802154_ADDR_LEN; i++) + (dest)[IEEE802154_ADDR_LEN - i - 1] = (src)[i]; +} + +/* list of all 6lowpan devices, uses for package delivering */ +/* print data in line */ +static inline void lowpan_raw_dump_inline(const char *caller, char *msg, + unsigned char *buf, int len) +{ +#ifdef DEBUG + if (msg) + pr_debug("(%s) %s: ", caller, msg); + print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_NONE, + 16, 1, buf, len, false); +#endif /* DEBUG */ +} + +/* + * print data in a table format: + * + * addr: xx xx xx xx xx xx + * addr: xx xx xx xx xx xx + * ... + */ +static inline void lowpan_raw_dump_table(const char *caller, char *msg, + unsigned char *buf, int len) +{ +#ifdef DEBUG + if (msg) + pr_debug("(%s) %s:\n", caller, msg); + print_hex_dump(KERN_DEBUG, "\t", DUMP_PREFIX_OFFSET, + 16, 1, buf, len, false); +#endif /* DEBUG */ +} + +static u8 +lowpan_compress_addr_64(u8 **hc06_ptr, u8 shift, const struct in6_addr *ipaddr, + const unsigned char *lladdr) +{ + u8 val = 0; + + if (is_addr_mac_addr_based(ipaddr, lladdr)) + val = 3; /* 0-bits */ + else if (lowpan_is_iid_16_bit_compressable(ipaddr)) { + /* compress IID to 16 bits xxxx::XXXX */ + memcpy(*hc06_ptr, &ipaddr->s6_addr16[7], 2); + *hc06_ptr += 2; + val = 2; /* 16-bits */ + } else { + /* do not compress IID => xxxx::IID */ + memcpy(*hc06_ptr, &ipaddr->s6_addr16[4], 8); + *hc06_ptr += 8; + val = 1; /* 64-bits */ + } + + return rol8(val, shift); +} + +static void +lowpan_uip_ds6_set_addr_iid(struct in6_addr *ipaddr, unsigned char *lladdr) +{ + memcpy(&ipaddr->s6_addr[8], lladdr, IEEE802154_ALEN); + /* second bit-flip (Universe/Local) is done according RFC2464 */ + ipaddr->s6_addr[8] ^= 0x02; +} + +/* + * Uncompress addresses based on a prefix and a postfix with zeroes in + * between. If the postfix is zero in length it will use the link address + * to configure the IP address (autoconf style). + * pref_post_count takes a byte where the first nibble specify prefix count + * and the second postfix count (NOTE: 15/0xf => 16 bytes copy). + */ +static int +lowpan_uncompress_addr(struct sk_buff *skb, struct in6_addr *ipaddr, + u8 const *prefix, u8 pref_post_count, unsigned char *lladdr) +{ + u8 prefcount = pref_post_count >> 4; + u8 postcount = pref_post_count & 0x0f; + + /* full nibble 15 => 16 */ + prefcount = (prefcount == 15 ? 16 : prefcount); + postcount = (postcount == 15 ? 16 : postcount); + + if (lladdr) + lowpan_raw_dump_inline(__func__, "linklocal address", + lladdr, IEEE802154_ALEN); + if (prefcount > 0) + memcpy(ipaddr, prefix, prefcount); + + if (prefcount + postcount < 16) + memset(&ipaddr->s6_addr[prefcount], 0, + 16 - (prefcount + postcount)); + + if (postcount > 0) { + memcpy(&ipaddr->s6_addr[16 - postcount], skb->data, postcount); + skb_pull(skb, postcount); + } else if (prefcount > 0) { + if (lladdr == NULL) + return -EINVAL; + + /* no IID based configuration if no prefix and no data */ + lowpan_uip_ds6_set_addr_iid(ipaddr, lladdr); + } + + pr_debug("(%s): uncompressing %d + %d => ", __func__, prefcount, + postcount); + lowpan_raw_dump_inline(NULL, NULL, ipaddr->s6_addr, 16); + + return 0; +} + +static void +lowpan_compress_udp_header(u8 **hc06_ptr, struct sk_buff *skb) +{ + struct udphdr *uh = udp_hdr(skb); + + pr_debug("(%s): UDP header compression\n", __func__); + + if (((uh->source & LOWPAN_NHC_UDP_4BIT_MASK) == + LOWPAN_NHC_UDP_4BIT_PORT) && + ((uh->dest & LOWPAN_NHC_UDP_4BIT_MASK) == + LOWPAN_NHC_UDP_4BIT_PORT)) { + pr_debug("(%s): both ports compression to 4 bits\n", __func__); + **hc06_ptr = LOWPAN_NHC_UDP_CS_P_11; + **(hc06_ptr + 1) = /* subtraction is faster */ + (u8)((uh->dest - LOWPAN_NHC_UDP_4BIT_PORT) + + ((uh->source & LOWPAN_NHC_UDP_4BIT_PORT) << 4)); + *hc06_ptr += 2; + } else if ((uh->dest & LOWPAN_NHC_UDP_8BIT_MASK) == + LOWPAN_NHC_UDP_8BIT_PORT) { + pr_debug("(%s): remove 8 bits of dest\n", __func__); + **hc06_ptr = LOWPAN_NHC_UDP_CS_P_01; + memcpy(*hc06_ptr + 1, &uh->source, 2); + **(hc06_ptr + 3) = (u8)(uh->dest - LOWPAN_NHC_UDP_8BIT_PORT); + *hc06_ptr += 4; + } else if ((uh->source & LOWPAN_NHC_UDP_8BIT_MASK) == + LOWPAN_NHC_UDP_8BIT_PORT) { + pr_debug("(%s): remove 8 bits of source\n", __func__); + **hc06_ptr = LOWPAN_NHC_UDP_CS_P_10; + memcpy(*hc06_ptr + 1, &uh->dest, 2); + **(hc06_ptr + 3) = (u8)(uh->source - LOWPAN_NHC_UDP_8BIT_PORT); + *hc06_ptr += 4; + } else { + pr_debug("(%s): can't compress header\n", __func__); + **hc06_ptr = LOWPAN_NHC_UDP_CS_P_00; + memcpy(*hc06_ptr + 1, &uh->source, 2); + memcpy(*hc06_ptr + 3, &uh->dest, 2); + *hc06_ptr += 5; + } + + /* checksum is always inline */ + memcpy(*hc06_ptr, &uh->check, 2); + *hc06_ptr += 2; +} + +static u8 lowpan_fetch_skb_u8(struct sk_buff *skb) +{ + u8 ret; + + ret = skb->data[0]; + skb_pull(skb, 1); + + return ret; +} + +static u16 lowpan_fetch_skb_u16(struct sk_buff *skb) +{ + u16 ret; + + BUG_ON(!pskb_may_pull(skb, 2)); + + ret = skb->data[0] | (skb->data[1] << 8); + skb_pull(skb, 2); + return ret; +} + +static int +lowpan_uncompress_udp_header(struct sk_buff *skb) +{ + struct udphdr *uh = udp_hdr(skb); + u8 tmp; + + tmp = lowpan_fetch_skb_u8(skb); + + if ((tmp & LOWPAN_NHC_UDP_MASK) == LOWPAN_NHC_UDP_ID) { + pr_debug("(%s): UDP header uncompression\n", __func__); + switch (tmp & LOWPAN_NHC_UDP_CS_P_11) { + case LOWPAN_NHC_UDP_CS_P_00: + memcpy(&uh->source, &skb->data[0], 2); + memcpy(&uh->dest, &skb->data[2], 2); + skb_pull(skb, 4); + break; + case LOWPAN_NHC_UDP_CS_P_01: + memcpy(&uh->source, &skb->data[0], 2); + uh->dest = + skb->data[2] + LOWPAN_NHC_UDP_8BIT_PORT; + skb_pull(skb, 3); + break; + case LOWPAN_NHC_UDP_CS_P_10: + uh->source = skb->data[0] + LOWPAN_NHC_UDP_8BIT_PORT; + memcpy(&uh->dest, &skb->data[1], 2); + skb_pull(skb, 3); + break; + case LOWPAN_NHC_UDP_CS_P_11: + uh->source = + LOWPAN_NHC_UDP_4BIT_PORT + (skb->data[0] >> 4); + uh->dest = + LOWPAN_NHC_UDP_4BIT_PORT + (skb->data[0] & 0x0f); + skb_pull(skb, 1); + break; + default: + pr_debug("(%s) ERROR: unknown UDP format\n", __func__); + goto err; + break; + } + + pr_debug("(%s): uncompressed UDP ports: src = %d, dst = %d\n", + __func__, uh->source, uh->dest); + + /* copy checksum */ + memcpy(&uh->check, &skb->data[0], 2); + skb_pull(skb, 2); + } else { + pr_debug("(%s): ERROR: unsupported NH format\n", __func__); + goto err; + } + + return 0; +err: + return -EINVAL; +} + +static int lowpan_header_create(struct sk_buff *skb, + struct net_device *dev, + unsigned short type, const void *_daddr, + const void *_saddr, unsigned len) +{ + u8 tmp, iphc0, iphc1, *hc06_ptr; + struct ipv6hdr *hdr; + const u8 *saddr = _saddr; + const u8 *daddr = _daddr; + u8 *head; + struct ieee802154_addr sa, da; + + if (type != ETH_P_IPV6) + return 0; + /* TODO: + * if this package isn't ipv6 one, where should it be routed? + */ + head = kzalloc(100, GFP_KERNEL); + if (head == NULL) + return -ENOMEM; + + hdr = ipv6_hdr(skb); + hc06_ptr = head + 2; + + pr_debug("(%s): IPv6 header dump:\n\tversion = %d\n\tlength = %d\n" + "\tnexthdr = 0x%02x\n\thop_lim = %d\n", __func__, + hdr->version, ntohs(hdr->payload_len), hdr->nexthdr, + hdr->hop_limit); + + lowpan_raw_dump_table(__func__, "raw skb network header dump", + skb_network_header(skb), sizeof(struct ipv6hdr)); + + if (!saddr) + saddr = dev->dev_addr; + + lowpan_raw_dump_inline(__func__, "saddr", (unsigned char *)saddr, 8); + + /* + * As we copy some bit-length fields, in the IPHC encoding bytes, + * we sometimes use |= + * If the field is 0, and the current bit value in memory is 1, + * this does not work. We therefore reset the IPHC encoding here + */ + iphc0 = LOWPAN_DISPATCH_IPHC; + iphc1 = 0; + + /* TODO: context lookup */ + + lowpan_raw_dump_inline(__func__, "daddr", (unsigned char *)daddr, 8); + + /* + * Traffic class, flow label + * If flow label is 0, compress it. If traffic class is 0, compress it + * We have to process both in the same time as the offset of traffic + * class depends on the presence of version and flow label + */ + + /* hc06 format of TC is ECN | DSCP , original one is DSCP | ECN */ + tmp = (hdr->priority << 4) | (hdr->flow_lbl[0] >> 4); + tmp = ((tmp & 0x03) << 6) | (tmp >> 2); + + if (((hdr->flow_lbl[0] & 0x0F) == 0) && + (hdr->flow_lbl[1] == 0) && (hdr->flow_lbl[2] == 0)) { + /* flow label can be compressed */ + iphc0 |= LOWPAN_IPHC_FL_C; + if ((hdr->priority == 0) && + ((hdr->flow_lbl[0] & 0xF0) == 0)) { + /* compress (elide) all */ + iphc0 |= LOWPAN_IPHC_TC_C; + } else { + /* compress only the flow label */ + *hc06_ptr = tmp; + hc06_ptr += 1; + } + } else { + /* Flow label cannot be compressed */ + if ((hdr->priority == 0) && + ((hdr->flow_lbl[0] & 0xF0) == 0)) { + /* compress only traffic class */ + iphc0 |= LOWPAN_IPHC_TC_C; + *hc06_ptr = (tmp & 0xc0) | (hdr->flow_lbl[0] & 0x0F); + memcpy(hc06_ptr + 1, &hdr->flow_lbl[1], 2); + hc06_ptr += 3; + } else { + /* compress nothing */ + memcpy(hc06_ptr, &hdr, 4); + /* replace the top byte with new ECN | DSCP format */ + *hc06_ptr = tmp; + hc06_ptr += 4; + } + } + + /* NOTE: payload length is always compressed */ + + /* Next Header is compress if UDP */ + if (hdr->nexthdr == UIP_PROTO_UDP) + iphc0 |= LOWPAN_IPHC_NH_C; + + if ((iphc0 & LOWPAN_IPHC_NH_C) == 0) { + *hc06_ptr = hdr->nexthdr; + hc06_ptr += 1; + } + + /* + * Hop limit + * if 1: compress, encoding is 01 + * if 64: compress, encoding is 10 + * if 255: compress, encoding is 11 + * else do not compress + */ + switch (hdr->hop_limit) { + case 1: + iphc0 |= LOWPAN_IPHC_TTL_1; + break; + case 64: + iphc0 |= LOWPAN_IPHC_TTL_64; + break; + case 255: + iphc0 |= LOWPAN_IPHC_TTL_255; + break; + default: + *hc06_ptr = hdr->hop_limit; + break; + } + + /* source address compression */ + if (is_addr_unspecified(&hdr->saddr)) { + pr_debug("(%s): source address is unspecified, setting SAC\n", + __func__); + iphc1 |= LOWPAN_IPHC_SAC; + /* TODO: context lookup */ + } else if (is_addr_link_local(&hdr->saddr)) { + pr_debug("(%s): source address is link-local\n", __func__); + iphc1 |= lowpan_compress_addr_64(&hc06_ptr, + LOWPAN_IPHC_SAM_BIT, &hdr->saddr, saddr); + } else { + pr_debug("(%s): send the full source address\n", __func__); + memcpy(hc06_ptr, &hdr->saddr.s6_addr16[0], 16); + hc06_ptr += 16; + } + + /* destination address compression */ + if (is_addr_mcast(&hdr->daddr)) { + pr_debug("(%s): destination address is multicast", __func__); + iphc1 |= LOWPAN_IPHC_M; + if (lowpan_is_mcast_addr_compressable8(&hdr->daddr)) { + pr_debug("compressed to 1 octet\n"); + iphc1 |= LOWPAN_IPHC_DAM_11; + /* use last byte */ + *hc06_ptr = hdr->daddr.s6_addr[15]; + hc06_ptr += 1; + } else if (lowpan_is_mcast_addr_compressable32(&hdr->daddr)) { + pr_debug("compressed to 4 octets\n"); + iphc1 |= LOWPAN_IPHC_DAM_10; + /* second byte + the last three */ + *hc06_ptr = hdr->daddr.s6_addr[1]; + memcpy(hc06_ptr + 1, &hdr->daddr.s6_addr[13], 3); + hc06_ptr += 4; + } else if (lowpan_is_mcast_addr_compressable48(&hdr->daddr)) { + pr_debug("compressed to 6 octets\n"); + iphc1 |= LOWPAN_IPHC_DAM_01; + /* second byte + the last five */ + *hc06_ptr = hdr->daddr.s6_addr[1]; + memcpy(hc06_ptr + 1, &hdr->daddr.s6_addr[11], 5); + hc06_ptr += 6; + } else { + pr_debug("using full address\n"); + iphc1 |= LOWPAN_IPHC_DAM_00; + memcpy(hc06_ptr, &hdr->daddr.s6_addr[0], 16); + hc06_ptr += 16; + } + } else { + pr_debug("(%s): destination address is unicast: ", __func__); + /* TODO: context lookup */ + if (is_addr_link_local(&hdr->daddr)) { + pr_debug("destination address is link-local\n"); + iphc1 |= lowpan_compress_addr_64(&hc06_ptr, + LOWPAN_IPHC_DAM_BIT, &hdr->daddr, daddr); + } else { + pr_debug("using full address\n"); + memcpy(hc06_ptr, &hdr->daddr.s6_addr16[0], 16); + hc06_ptr += 16; + } + } + + /* UDP header compression */ + if (hdr->nexthdr == UIP_PROTO_UDP) + lowpan_compress_udp_header(&hc06_ptr, skb); + + head[0] = iphc0; + head[1] = iphc1; + + skb_pull(skb, sizeof(struct ipv6hdr)); + memcpy(skb_push(skb, hc06_ptr - head), head, hc06_ptr - head); + + kfree(head); + + lowpan_raw_dump_table(__func__, "raw skb data dump", skb->data, + skb->len); + + /* + * NOTE1: I'm still unsure about the fact that compression and WPAN + * header are created here and not later in the xmit. So wait for + * an opinion of net maintainers. + */ + /* + * NOTE2: to be absolutely correct, we must derive PANid information + * from MAC subif of the 'dev' and 'real_dev' network devices, but + * this isn't implemented in mainline yet, so currently we assign 0xff + */ + { + /* prepare wpan address data */ + sa.addr_type = IEEE802154_ADDR_LONG; + sa.pan_id = 0xff; + + da.addr_type = IEEE802154_ADDR_LONG; + da.pan_id = 0xff; + + memcpy(&(da.hwaddr), daddr, 8); + memcpy(&(sa.hwaddr), saddr, 8); + + mac_cb(skb)->flags = IEEE802154_FC_TYPE_DATA; + + return dev_hard_header(skb, lowpan_dev_info(dev)->real_dev, + type, (void *)&da, (void *)&sa, skb->len); + } +} + +static int lowpan_skb_deliver(struct sk_buff *skb, struct ipv6hdr *hdr) +{ + struct sk_buff *new; + struct lowpan_dev_record *entry; + int stat = NET_RX_SUCCESS; + + new = skb_copy_expand(skb, sizeof(struct ipv6hdr), skb_tailroom(skb), + GFP_ATOMIC); + kfree_skb(skb); + + if (!new) + return -ENOMEM; + + skb_push(new, sizeof(struct ipv6hdr)); + skb_reset_network_header(new); + skb_copy_to_linear_data(new, hdr, sizeof(struct ipv6hdr)); + + new->protocol = htons(ETH_P_IPV6); + new->pkt_type = PACKET_HOST; + + rcu_read_lock(); + list_for_each_entry_rcu(entry, &lowpan_devices, list) + if (lowpan_dev_info(entry->ldev)->real_dev == new->dev) { + skb = skb_copy(new, GFP_ATOMIC); + if (!skb) { + stat = -ENOMEM; + break; + } + + skb->dev = entry->ldev; + stat = netif_rx(skb); + } + rcu_read_unlock(); + + kfree_skb(new); + + return stat; +} + +static void lowpan_fragment_timer_expired(unsigned long entry_addr) +{ + struct lowpan_fragment *entry = (struct lowpan_fragment *)entry_addr; + + pr_debug("%s: timer expired for frame with tag %d\n", __func__, + entry->tag); + + spin_lock(&flist_lock); + list_del(&entry->list); + spin_unlock(&flist_lock); + + dev_kfree_skb(entry->skb); + kfree(entry); +} + +static int +lowpan_process_data(struct sk_buff *skb) +{ + struct ipv6hdr hdr; + u8 tmp, iphc0, iphc1, num_context = 0; + u8 *_saddr, *_daddr; + int err; + + lowpan_raw_dump_table(__func__, "raw skb data dump", skb->data, + skb->len); + /* at least two bytes will be used for the encoding */ + if (skb->len < 2) + goto drop; + iphc0 = lowpan_fetch_skb_u8(skb); + + /* fragments assembling */ + switch (iphc0 & LOWPAN_DISPATCH_MASK) { + case LOWPAN_DISPATCH_FRAG1: + case LOWPAN_DISPATCH_FRAGN: + { + struct lowpan_fragment *frame; + u8 len, offset; + u16 tag; + bool found = false; + + len = lowpan_fetch_skb_u8(skb); /* frame length */ + tag = lowpan_fetch_skb_u16(skb); + + /* + * check if frame assembling with the same tag is + * already in progress + */ + spin_lock(&flist_lock); + + list_for_each_entry(frame, &lowpan_fragments, list) + if (frame->tag == tag) { + found = true; + break; + } + + /* alloc new frame structure */ + if (!found) { + frame = kzalloc(sizeof(struct lowpan_fragment), + GFP_ATOMIC); + if (!frame) + goto unlock_and_drop; + + INIT_LIST_HEAD(&frame->list); + + frame->length = (iphc0 & 7) | (len << 3); + frame->tag = tag; + + /* allocate buffer for frame assembling */ + frame->skb = alloc_skb(frame->length + + sizeof(struct ipv6hdr), GFP_ATOMIC); + + if (!frame->skb) { + kfree(frame); + goto unlock_and_drop; + } + + frame->skb->priority = skb->priority; + frame->skb->dev = skb->dev; + + /* reserve headroom for uncompressed ipv6 header */ + skb_reserve(frame->skb, sizeof(struct ipv6hdr)); + skb_put(frame->skb, frame->length); + + init_timer(&frame->timer); + /* time out is the same as for ipv6 - 60 sec */ + frame->timer.expires = jiffies + LOWPAN_FRAG_TIMEOUT; + frame->timer.data = (unsigned long)frame; + frame->timer.function = lowpan_fragment_timer_expired; + + add_timer(&frame->timer); + + list_add_tail(&frame->list, &lowpan_fragments); + } + + if ((iphc0 & LOWPAN_DISPATCH_MASK) == LOWPAN_DISPATCH_FRAG1) + goto unlock_and_drop; + + offset = lowpan_fetch_skb_u8(skb); /* fetch offset */ + + /* if payload fits buffer, copy it */ + if (likely((offset * 8 + skb->len) <= frame->length)) + skb_copy_to_linear_data_offset(frame->skb, offset * 8, + skb->data, skb->len); + else + goto unlock_and_drop; + + frame->bytes_rcv += skb->len; + + /* frame assembling complete */ + if ((frame->bytes_rcv == frame->length) && + frame->timer.expires > jiffies) { + /* if timer haven't expired - first of all delete it */ + del_timer(&frame->timer); + list_del(&frame->list); + spin_unlock(&flist_lock); + + dev_kfree_skb(skb); + skb = frame->skb; + kfree(frame); + iphc0 = lowpan_fetch_skb_u8(skb); + break; + } + spin_unlock(&flist_lock); + + return kfree_skb(skb), 0; + } + default: + break; + } + + iphc1 = lowpan_fetch_skb_u8(skb); + + _saddr = mac_cb(skb)->sa.hwaddr; + _daddr = mac_cb(skb)->da.hwaddr; + + pr_debug("(%s): iphc0 = %02x, iphc1 = %02x\n", __func__, iphc0, iphc1); + + /* another if the CID flag is set */ + if (iphc1 & LOWPAN_IPHC_CID) { + pr_debug("(%s): CID flag is set, increase header with one\n", + __func__); + if (!skb->len) + goto drop; + num_context = lowpan_fetch_skb_u8(skb); + } + + hdr.version = 6; + + /* Traffic Class and Flow Label */ + switch ((iphc0 & LOWPAN_IPHC_TF) >> 3) { + /* + * Traffic Class and FLow Label carried in-line + * ECN + DSCP + 4-bit Pad + Flow Label (4 bytes) + */ + case 0: /* 00b */ + if (!skb->len) + goto drop; + tmp = lowpan_fetch_skb_u8(skb); + memcpy(&hdr.flow_lbl, &skb->data[0], 3); + skb_pull(skb, 3); + hdr.priority = ((tmp >> 2) & 0x0f); + hdr.flow_lbl[0] = ((tmp >> 2) & 0x30) | (tmp << 6) | + (hdr.flow_lbl[0] & 0x0f); + break; + /* + * Traffic class carried in-line + * ECN + DSCP (1 byte), Flow Label is elided + */ + case 1: /* 10b */ + if (!skb->len) + goto drop; + tmp = lowpan_fetch_skb_u8(skb); + hdr.priority = ((tmp >> 2) & 0x0f); + hdr.flow_lbl[0] = ((tmp << 6) & 0xC0) | ((tmp >> 2) & 0x30); + hdr.flow_lbl[1] = 0; + hdr.flow_lbl[2] = 0; + break; + /* + * Flow Label carried in-line + * ECN + 2-bit Pad + Flow Label (3 bytes), DSCP is elided + */ + case 2: /* 01b */ + if (!skb->len) + goto drop; + tmp = lowpan_fetch_skb_u8(skb); + hdr.flow_lbl[0] = (skb->data[0] & 0x0F) | ((tmp >> 2) & 0x30); + memcpy(&hdr.flow_lbl[1], &skb->data[0], 2); + skb_pull(skb, 2); + break; + /* Traffic Class and Flow Label are elided */ + case 3: /* 11b */ + hdr.priority = 0; + hdr.flow_lbl[0] = 0; + hdr.flow_lbl[1] = 0; + hdr.flow_lbl[2] = 0; + break; + default: + break; + } + + /* Next Header */ + if ((iphc0 & LOWPAN_IPHC_NH_C) == 0) { + /* Next header is carried inline */ + if (!skb->len) + goto drop; + hdr.nexthdr = lowpan_fetch_skb_u8(skb); + pr_debug("(%s): NH flag is set, next header is carried " + "inline: %02x\n", __func__, hdr.nexthdr); + } + + /* Hop Limit */ + if ((iphc0 & 0x03) != LOWPAN_IPHC_TTL_I) + hdr.hop_limit = lowpan_ttl_values[iphc0 & 0x03]; + else { + if (!skb->len) + goto drop; + hdr.hop_limit = lowpan_fetch_skb_u8(skb); + } + + /* Extract SAM to the tmp variable */ + tmp = ((iphc1 & LOWPAN_IPHC_SAM) >> LOWPAN_IPHC_SAM_BIT) & 0x03; + + /* Source address uncompression */ + pr_debug("(%s): source address stateless compression\n", __func__); + err = lowpan_uncompress_addr(skb, &hdr.saddr, lowpan_llprefix, + lowpan_unc_llconf[tmp], skb->data); + if (err) + goto drop; + + /* Extract DAM to the tmp variable */ + tmp = ((iphc1 & LOWPAN_IPHC_DAM_11) >> LOWPAN_IPHC_DAM_BIT) & 0x03; + + /* check for Multicast Compression */ + if (iphc1 & LOWPAN_IPHC_M) { + if (iphc1 & LOWPAN_IPHC_DAC) { + pr_debug("(%s): destination address context-based " + "multicast compression\n", __func__); + /* TODO: implement this */ + } else { + u8 prefix[] = {0xff, 0x02}; + + pr_debug("(%s): destination address non-context-based" + " multicast compression\n", __func__); + if (0 < tmp && tmp < 3) { + if (!skb->len) + goto drop; + else + prefix[1] = lowpan_fetch_skb_u8(skb); + } + + err = lowpan_uncompress_addr(skb, &hdr.daddr, prefix, + lowpan_unc_mxconf[tmp], NULL); + if (err) + goto drop; + } + } else { + pr_debug("(%s): destination address stateless compression\n", + __func__); + err = lowpan_uncompress_addr(skb, &hdr.daddr, lowpan_llprefix, + lowpan_unc_llconf[tmp], skb->data); + if (err) + goto drop; + } + + /* UDP data uncompression */ + if (iphc0 & LOWPAN_IPHC_NH_C) + if (lowpan_uncompress_udp_header(skb)) + goto drop; + + /* Not fragmented package */ + hdr.payload_len = htons(skb->len); + + pr_debug("(%s): skb headroom size = %d, data length = %d\n", __func__, + skb_headroom(skb), skb->len); + + pr_debug("(%s): IPv6 header dump:\n\tversion = %d\n\tlength = %d\n\t" + "nexthdr = 0x%02x\n\thop_lim = %d\n", __func__, hdr.version, + ntohs(hdr.payload_len), hdr.nexthdr, hdr.hop_limit); + + lowpan_raw_dump_table(__func__, "raw header dump", (u8 *)&hdr, + sizeof(hdr)); + return lowpan_skb_deliver(skb, &hdr); + +unlock_and_drop: + spin_unlock(&flist_lock); +drop: + kfree_skb(skb); + return -EINVAL; +} + +static int lowpan_set_address(struct net_device *dev, void *p) +{ + struct sockaddr *sa = p; + + if (netif_running(dev)) + return -EBUSY; + + /* TODO: validate addr */ + memcpy(dev->dev_addr, sa->sa_data, dev->addr_len); + + return 0; +} + +static int lowpan_get_mac_header_length(struct sk_buff *skb) +{ + /* + * Currently long addressing mode is supported only, so the overall + * header size is 21: + * FC SeqNum DPAN DA SA Sec + * 2 + 1 + 2 + 8 + 8 + 0 = 21 + */ + return 21; +} + +static int +lowpan_fragment_xmit(struct sk_buff *skb, u8 *head, + int mlen, int plen, int offset) +{ + struct sk_buff *frag; + int hlen, ret; + + /* if payload length is zero, therefore it's a first fragment */ + hlen = (plen == 0 ? LOWPAN_FRAG1_HEAD_SIZE : LOWPAN_FRAGN_HEAD_SIZE); + + lowpan_raw_dump_inline(__func__, "6lowpan fragment header", head, hlen); + + frag = dev_alloc_skb(hlen + mlen + plen + IEEE802154_MFR_SIZE); + if (!frag) + return -ENOMEM; + + frag->priority = skb->priority; + frag->dev = skb->dev; + + /* copy header, MFR and payload */ + memcpy(skb_put(frag, mlen), skb->data, mlen); + memcpy(skb_put(frag, hlen), head, hlen); + + if (plen) + skb_copy_from_linear_data_offset(skb, offset + mlen, + skb_put(frag, plen), plen); + + lowpan_raw_dump_table(__func__, " raw fragment dump", frag->data, + frag->len); + + ret = dev_queue_xmit(frag); + + return ret; +} + +static int +lowpan_skb_fragmentation(struct sk_buff *skb) +{ + int err, header_length, payload_length, tag, offset = 0; + u8 head[5]; + + header_length = lowpan_get_mac_header_length(skb); + payload_length = skb->len - header_length; + tag = fragment_tag++; + + /* first fragment header */ + head[0] = LOWPAN_DISPATCH_FRAG1 | (payload_length & 0x7); + head[1] = (payload_length >> 3) & 0xff; + head[2] = tag & 0xff; + head[3] = tag >> 8; + + err = lowpan_fragment_xmit(skb, head, header_length, 0, 0); + + /* next fragment header */ + head[0] &= ~LOWPAN_DISPATCH_FRAG1; + head[0] |= LOWPAN_DISPATCH_FRAGN; + + while ((payload_length - offset > 0) && (err >= 0)) { + int len = LOWPAN_FRAG_SIZE; + + head[4] = offset / 8; + + if (payload_length - offset < len) + len = payload_length - offset; + + err = lowpan_fragment_xmit(skb, head, header_length, + len, offset); + offset += len; + } + + return err; +} + +static netdev_tx_t lowpan_xmit(struct sk_buff *skb, struct net_device *dev) +{ + int err = -1; + + pr_debug("(%s): package xmit\n", __func__); + + skb->dev = lowpan_dev_info(dev)->real_dev; + if (skb->dev == NULL) { + pr_debug("(%s) ERROR: no real wpan device found\n", __func__); + goto error; + } + + if (skb->len <= IEEE802154_MTU) { + err = dev_queue_xmit(skb); + goto out; + } + + pr_debug("(%s): frame is too big, fragmentation is needed\n", + __func__); + err = lowpan_skb_fragmentation(skb); +error: + dev_kfree_skb(skb); +out: + if (err < 0) + pr_debug("(%s): ERROR: xmit failed\n", __func__); + + return (err < 0 ? NETDEV_TX_BUSY : NETDEV_TX_OK); +} + +static void lowpan_dev_free(struct net_device *dev) +{ + dev_put(lowpan_dev_info(dev)->real_dev); + free_netdev(dev); +} + +static struct header_ops lowpan_header_ops = { + .create = lowpan_header_create, +}; + +static const struct net_device_ops lowpan_netdev_ops = { + .ndo_start_xmit = lowpan_xmit, + .ndo_set_mac_address = lowpan_set_address, +}; + +static void lowpan_setup(struct net_device *dev) +{ + pr_debug("(%s)\n", __func__); + + dev->addr_len = IEEE802154_ADDR_LEN; + memset(dev->broadcast, 0xff, IEEE802154_ADDR_LEN); + dev->type = ARPHRD_IEEE802154; + /* Frame Control + Sequence Number + Address fields + Security Header */ + dev->hard_header_len = 2 + 1 + 20 + 14; + dev->needed_tailroom = 2; /* FCS */ + dev->mtu = 1281; + dev->tx_queue_len = 0; + dev->flags = IFF_BROADCAST | IFF_MULTICAST; + dev->watchdog_timeo = 0; + + dev->netdev_ops = &lowpan_netdev_ops; + dev->header_ops = &lowpan_header_ops; + dev->destructor = lowpan_dev_free; +} + +static int lowpan_validate(struct nlattr *tb[], struct nlattr *data[]) +{ + pr_debug("(%s)\n", __func__); + + if (tb[IFLA_ADDRESS]) { + if (nla_len(tb[IFLA_ADDRESS]) != IEEE802154_ADDR_LEN) + return -EINVAL; + } + return 0; +} + +static int lowpan_rcv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, struct net_device *orig_dev) +{ + if (!netif_running(dev)) + goto drop; + + if (dev->type != ARPHRD_IEEE802154) + goto drop; + + /* check that it's our buffer */ + switch (skb->data[0] & 0xe0) { + case LOWPAN_DISPATCH_IPHC: /* ipv6 datagram */ + case LOWPAN_DISPATCH_FRAG1: /* first fragment header */ + case LOWPAN_DISPATCH_FRAGN: /* next fragments headers */ + lowpan_process_data(skb); + break; + default: + break; + } + + return NET_RX_SUCCESS; + +drop: + kfree_skb(skb); + return NET_RX_DROP; +} + +static int lowpan_newlink(struct net *src_net, struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[]) +{ + struct net_device *real_dev; + struct lowpan_dev_record *entry; + + pr_debug("(%s)\n", __func__); + + if (!tb[IFLA_LINK]) + return -EINVAL; + /* find and hold real wpan device */ + real_dev = dev_get_by_index(src_net, nla_get_u32(tb[IFLA_LINK])); + if (!real_dev) + return -ENODEV; + + lowpan_dev_info(dev)->real_dev = real_dev; + mutex_init(&lowpan_dev_info(dev)->dev_list_mtx); + + entry = kzalloc(sizeof(struct lowpan_dev_record), GFP_KERNEL); + if (!entry) { + dev_put(real_dev); + lowpan_dev_info(dev)->real_dev = NULL; + return -ENOMEM; + } + + entry->ldev = dev; + + mutex_lock(&lowpan_dev_info(dev)->dev_list_mtx); + INIT_LIST_HEAD(&entry->list); + list_add_tail(&entry->list, &lowpan_devices); + mutex_unlock(&lowpan_dev_info(dev)->dev_list_mtx); + + register_netdevice(dev); + + return 0; +} + +static void lowpan_dellink(struct net_device *dev, struct list_head *head) +{ + struct lowpan_dev_info *lowpan_dev = lowpan_dev_info(dev); + struct net_device *real_dev = lowpan_dev->real_dev; + struct lowpan_dev_record *entry; + struct lowpan_dev_record *tmp; + + ASSERT_RTNL(); + + mutex_lock(&lowpan_dev_info(dev)->dev_list_mtx); + list_for_each_entry_safe(entry, tmp, &lowpan_devices, list) { + if (entry->ldev == dev) { + list_del(&entry->list); + kfree(entry); + } + } + mutex_unlock(&lowpan_dev_info(dev)->dev_list_mtx); + + mutex_destroy(&lowpan_dev_info(dev)->dev_list_mtx); + + unregister_netdevice_queue(dev, head); + + dev_put(real_dev); +} + +static struct rtnl_link_ops lowpan_link_ops __read_mostly = { + .kind = "lowpan", + .priv_size = sizeof(struct lowpan_dev_info), + .setup = lowpan_setup, + .newlink = lowpan_newlink, + .dellink = lowpan_dellink, + .validate = lowpan_validate, +}; + +static inline int __init lowpan_netlink_init(void) +{ + return rtnl_link_register(&lowpan_link_ops); +} + +static inline void __init lowpan_netlink_fini(void) +{ + rtnl_link_unregister(&lowpan_link_ops); +} + +static struct packet_type lowpan_packet_type = { + .type = __constant_htons(ETH_P_IEEE802154), + .func = lowpan_rcv, +}; + +static int __init lowpan_init_module(void) +{ + int err = 0; + + pr_debug("(%s)\n", __func__); + + err = lowpan_netlink_init(); + if (err < 0) + goto out; + + dev_add_pack(&lowpan_packet_type); +out: + return err; +} + +static void __exit lowpan_cleanup_module(void) +{ + pr_debug("(%s)\n", __func__); + + lowpan_netlink_fini(); + + dev_remove_pack(&lowpan_packet_type); +} + +module_init(lowpan_init_module); +module_exit(lowpan_cleanup_module); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_RTNL_LINK("lowpan"); diff --git a/net/ieee802154/6lowpan.h b/net/ieee802154/6lowpan.h new file mode 100644 index 000000000000..aeff3f310482 --- /dev/null +++ b/net/ieee802154/6lowpan.h @@ -0,0 +1,235 @@ +/* + * Copyright 2011, Siemens AG + * written by Alexander Smirnov <alex.bluesman.smirnov@gmail.com> + */ + +/* + * Based on patches from Jon Smirl <jonsmirl@gmail.com> + * Copyright (c) 2011 Jon Smirl <jonsmirl@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +/* Jon's code is based on 6lowpan implementation for Contiki which is: + * Copyright (c) 2008, Swedish Institute of Computer Science. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the Institute nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef __6LOWPAN_H__ +#define __6LOWPAN_H__ + +/* need to know address length to manipulate with it */ +#define IEEE802154_ALEN 8 + +#define UIP_802154_SHORTADDR_LEN 2 /* compressed ipv6 address length */ +#define UIP_IPH_LEN 40 /* ipv6 fixed header size */ +#define UIP_PROTO_UDP 17 /* ipv6 next header value for UDP */ +#define UIP_FRAGH_LEN 8 /* ipv6 fragment header size */ + +/* + * ipv6 address based on mac + * second bit-flip (Universe/Local) is done according RFC2464 + */ +#define is_addr_mac_addr_based(a, m) \ + ((((a)->s6_addr[8]) == (((m)[0]) ^ 0x02)) && \ + (((a)->s6_addr[9]) == (m)[1]) && \ + (((a)->s6_addr[10]) == (m)[2]) && \ + (((a)->s6_addr[11]) == (m)[3]) && \ + (((a)->s6_addr[12]) == (m)[4]) && \ + (((a)->s6_addr[13]) == (m)[5]) && \ + (((a)->s6_addr[14]) == (m)[6]) && \ + (((a)->s6_addr[15]) == (m)[7])) + +/* ipv6 address is unspecified */ +#define is_addr_unspecified(a) \ + ((((a)->s6_addr32[0]) == 0) && \ + (((a)->s6_addr32[1]) == 0) && \ + (((a)->s6_addr32[2]) == 0) && \ + (((a)->s6_addr32[3]) == 0)) + +/* compare ipv6 addresses prefixes */ +#define ipaddr_prefixcmp(addr1, addr2, length) \ + (memcmp(addr1, addr2, length >> 3) == 0) + +/* local link, i.e. FE80::/10 */ +#define is_addr_link_local(a) (((a)->s6_addr16[0]) == 0x80FE) + +/* + * check whether we can compress the IID to 16 bits, + * it's possible for unicast adresses with first 49 bits are zero only. + */ +#define lowpan_is_iid_16_bit_compressable(a) \ + ((((a)->s6_addr16[4]) == 0) && \ + (((a)->s6_addr16[5]) == 0) && \ + (((a)->s6_addr16[6]) == 0) && \ + ((((a)->s6_addr[14]) & 0x80) == 0)) + +/* multicast address */ +#define is_addr_mcast(a) (((a)->s6_addr[0]) == 0xFF) + +/* check whether the 112-bit gid of the multicast address is mappable to: */ + +/* 9 bits, for FF02::1 (all nodes) and FF02::2 (all routers) addresses only. */ +#define lowpan_is_mcast_addr_compressable(a) \ + ((((a)->s6_addr16[1]) == 0) && \ + (((a)->s6_addr16[2]) == 0) && \ + (((a)->s6_addr16[3]) == 0) && \ + (((a)->s6_addr16[4]) == 0) && \ + (((a)->s6_addr16[5]) == 0) && \ + (((a)->s6_addr16[6]) == 0) && \ + (((a)->s6_addr[14]) == 0) && \ + ((((a)->s6_addr[15]) == 1) || (((a)->s6_addr[15]) == 2))) + +/* 48 bits, FFXX::00XX:XXXX:XXXX */ +#define lowpan_is_mcast_addr_compressable48(a) \ + ((((a)->s6_addr16[1]) == 0) && \ + (((a)->s6_addr16[2]) == 0) && \ + (((a)->s6_addr16[3]) == 0) && \ + (((a)->s6_addr16[4]) == 0) && \ + (((a)->s6_addr[10]) == 0)) + +/* 32 bits, FFXX::00XX:XXXX */ +#define lowpan_is_mcast_addr_compressable32(a) \ + ((((a)->s6_addr16[1]) == 0) && \ + (((a)->s6_addr16[2]) == 0) && \ + (((a)->s6_addr16[3]) == 0) && \ + (((a)->s6_addr16[4]) == 0) && \ + (((a)->s6_addr16[5]) == 0) && \ + (((a)->s6_addr[12]) == 0)) + +/* 8 bits, FF02::00XX */ +#define lowpan_is_mcast_addr_compressable8(a) \ + ((((a)->s6_addr[1]) == 2) && \ + (((a)->s6_addr16[1]) == 0) && \ + (((a)->s6_addr16[2]) == 0) && \ + (((a)->s6_addr16[3]) == 0) && \ + (((a)->s6_addr16[4]) == 0) && \ + (((a)->s6_addr16[5]) == 0) && \ + (((a)->s6_addr16[6]) == 0) && \ + (((a)->s6_addr[14]) == 0)) + +#define lowpan_is_addr_broadcast(a) \ + ((((a)[0]) == 0xFF) && \ + (((a)[1]) == 0xFF) && \ + (((a)[2]) == 0xFF) && \ + (((a)[3]) == 0xFF) && \ + (((a)[4]) == 0xFF) && \ + (((a)[5]) == 0xFF) && \ + (((a)[6]) == 0xFF) && \ + (((a)[7]) == 0xFF)) + +#define LOWPAN_DISPATCH_IPV6 0x41 /* 01000001 = 65 */ +#define LOWPAN_DISPATCH_HC1 0x42 /* 01000010 = 66 */ +#define LOWPAN_DISPATCH_IPHC 0x60 /* 011xxxxx = ... */ +#define LOWPAN_DISPATCH_FRAG1 0xc0 /* 11000xxx */ +#define LOWPAN_DISPATCH_FRAGN 0xe0 /* 11100xxx */ + +#define LOWPAN_DISPATCH_MASK 0xf8 /* 11111000 */ + +#define LOWPAN_FRAG_TIMEOUT (HZ * 60) /* time-out 60 sec */ + +#define LOWPAN_FRAG1_HEAD_SIZE 0x4 +#define LOWPAN_FRAGN_HEAD_SIZE 0x5 + +/* + * According IEEE802.15.4 standard: + * - MTU is 127 octets + * - maximum MHR size is 37 octets + * - MFR size is 2 octets + * + * so minimal payload size that we may guarantee is: + * MTU - MHR - MFR = 88 octets + */ +#define LOWPAN_FRAG_SIZE 88 + +/* + * Values of fields within the IPHC encoding first byte + * (C stands for compressed and I for inline) + */ +#define LOWPAN_IPHC_TF 0x18 + +#define LOWPAN_IPHC_FL_C 0x10 +#define LOWPAN_IPHC_TC_C 0x08 +#define LOWPAN_IPHC_NH_C 0x04 +#define LOWPAN_IPHC_TTL_1 0x01 +#define LOWPAN_IPHC_TTL_64 0x02 +#define LOWPAN_IPHC_TTL_255 0x03 +#define LOWPAN_IPHC_TTL_I 0x00 + + +/* Values of fields within the IPHC encoding second byte */ +#define LOWPAN_IPHC_CID 0x80 + +#define LOWPAN_IPHC_SAC 0x40 +#define LOWPAN_IPHC_SAM_00 0x00 +#define LOWPAN_IPHC_SAM_01 0x10 +#define LOWPAN_IPHC_SAM_10 0x20 +#define LOWPAN_IPHC_SAM 0x30 + +#define LOWPAN_IPHC_SAM_BIT 4 + +#define LOWPAN_IPHC_M 0x08 +#define LOWPAN_IPHC_DAC 0x04 +#define LOWPAN_IPHC_DAM_00 0x00 +#define LOWPAN_IPHC_DAM_01 0x01 +#define LOWPAN_IPHC_DAM_10 0x02 +#define LOWPAN_IPHC_DAM_11 0x03 + +#define LOWPAN_IPHC_DAM_BIT 0 +/* + * LOWPAN_UDP encoding (works together with IPHC) + */ +#define LOWPAN_NHC_UDP_MASK 0xF8 +#define LOWPAN_NHC_UDP_ID 0xF0 +#define LOWPAN_NHC_UDP_CHECKSUMC 0x04 +#define LOWPAN_NHC_UDP_CHECKSUMI 0x00 + +#define LOWPAN_NHC_UDP_4BIT_PORT 0xF0B0 +#define LOWPAN_NHC_UDP_4BIT_MASK 0xFFF0 +#define LOWPAN_NHC_UDP_8BIT_PORT 0xF000 +#define LOWPAN_NHC_UDP_8BIT_MASK 0xFF00 + +/* values for port compression, _with checksum_ ie bit 5 set to 0 */ +#define LOWPAN_NHC_UDP_CS_P_00 0xF0 /* all inline */ +#define LOWPAN_NHC_UDP_CS_P_01 0xF1 /* source 16bit inline, + dest = 0xF0 + 8 bit inline */ +#define LOWPAN_NHC_UDP_CS_P_10 0xF2 /* source = 0xF0 + 8bit inline, + dest = 16 bit inline */ +#define LOWPAN_NHC_UDP_CS_P_11 0xF3 /* source & dest = 0xF0B + 4bit inline */ + +#endif /* __6LOWPAN_H__ */ diff --git a/net/ieee802154/Kconfig b/net/ieee802154/Kconfig index 1c1de97d264a..7dee65052925 100644 --- a/net/ieee802154/Kconfig +++ b/net/ieee802154/Kconfig @@ -10,3 +10,9 @@ config IEEE802154 Say Y here to compile LR-WPAN support into the kernel or say M to compile it as modules. + +config IEEE802154_6LOWPAN + tristate "6lowpan support over IEEE 802.15.4" + depends on IEEE802154 && IPV6 + ---help--- + IPv6 compression over IEEE 802.15.4. diff --git a/net/ieee802154/Makefile b/net/ieee802154/Makefile index 5761185f884e..d7716d64c6bb 100644 --- a/net/ieee802154/Makefile +++ b/net/ieee802154/Makefile @@ -1,3 +1,5 @@ -obj-$(CONFIG_IEEE802154) += ieee802154.o af_802154.o -ieee802154-y := netlink.o nl-mac.o nl-phy.o nl_policy.o wpan-class.o -af_802154-y := af_ieee802154.o raw.o dgram.o +obj-$(CONFIG_IEEE802154) += ieee802154.o af_802154.o +obj-$(CONFIG_IEEE802154_6LOWPAN) += 6lowpan.o + +ieee802154-y := netlink.o nl-mac.o nl-phy.o nl_policy.o wpan-class.o +af_802154-y := af_ieee802154.o raw.o dgram.o diff --git a/net/ieee802154/dgram.c b/net/ieee802154/dgram.c index faecf648123f..1b09eaabaac1 100644 --- a/net/ieee802154/dgram.c +++ b/net/ieee802154/dgram.c @@ -209,6 +209,7 @@ static int dgram_sendmsg(struct kiocb *iocb, struct sock *sk, unsigned mtu; struct sk_buff *skb; struct dgram_sock *ro = dgram_sk(sk); + int hlen, tlen; int err; if (msg->msg_flags & MSG_OOB) { @@ -229,13 +230,15 @@ static int dgram_sendmsg(struct kiocb *iocb, struct sock *sk, mtu = dev->mtu; pr_debug("name = %s, mtu = %u\n", dev->name, mtu); - skb = sock_alloc_send_skb(sk, LL_ALLOCATED_SPACE(dev) + size, + hlen = LL_RESERVED_SPACE(dev); + tlen = dev->needed_tailroom; + skb = sock_alloc_send_skb(sk, hlen + tlen + size, msg->msg_flags & MSG_DONTWAIT, &err); if (!skb) goto out_dev; - skb_reserve(skb, LL_RESERVED_SPACE(dev)); + skb_reserve(skb, hlen); skb_reset_network_header(skb); diff --git a/net/ieee802154/nl-mac.c b/net/ieee802154/nl-mac.c index 71ee1108d4f8..adaf46214905 100644 --- a/net/ieee802154/nl-mac.c +++ b/net/ieee802154/nl-mac.c @@ -30,6 +30,7 @@ #include <net/genetlink.h> #include <net/sock.h> #include <linux/nl802154.h> +#include <linux/export.h> #include <net/af_ieee802154.h> #include <net/nl802154.h> #include <net/ieee802154.h> diff --git a/net/ieee802154/raw.c b/net/ieee802154/raw.c index 10970ca85748..f96bae8fd330 100644 --- a/net/ieee802154/raw.c +++ b/net/ieee802154/raw.c @@ -108,6 +108,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, struct net_device *dev; unsigned mtu; struct sk_buff *skb; + int hlen, tlen; int err; if (msg->msg_flags & MSG_OOB) { @@ -137,12 +138,14 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, goto out_dev; } - skb = sock_alloc_send_skb(sk, LL_ALLOCATED_SPACE(dev) + size, + hlen = LL_RESERVED_SPACE(dev); + tlen = dev->needed_tailroom; + skb = sock_alloc_send_skb(sk, hlen + tlen + size, msg->msg_flags & MSG_DONTWAIT, &err); if (!skb) goto out_dev; - skb_reserve(skb, LL_RESERVED_SPACE(dev)); + skb_reserve(skb, hlen); skb_reset_mac_header(skb); skb_reset_network_header(skb); diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index cbb505ba9324..aa2a2c79776f 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -163,8 +163,6 @@ config IP_PNP_RARP operating on your network. Read <file:Documentation/filesystems/nfs/nfsroot.txt> for details. -# not yet ready.. -# bool ' IP: ARP support' CONFIG_IP_PNP_ARP config NET_IPIP tristate "IP: tunneling" select INET_TUNNEL @@ -409,6 +407,14 @@ config INET_TCP_DIAG depends on INET_DIAG def_tristate INET_DIAG +config INET_UDP_DIAG + tristate "UDP: socket monitoring interface" + depends on INET_DIAG + default n + ---help--- + Support for UDP socket monitoring interface used by the ss tool. + If unsure, say Y. + menuconfig TCP_CONG_ADVANCED bool "TCP: advanced congestion control" ---help--- diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index f2dc69cffb57..ff75d3bbcd6a 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -34,6 +34,7 @@ obj-$(CONFIG_IP_PNP) += ipconfig.o obj-$(CONFIG_NETFILTER) += netfilter.o netfilter/ obj-$(CONFIG_INET_DIAG) += inet_diag.o obj-$(CONFIG_INET_TCP_DIAG) += tcp_diag.o +obj-$(CONFIG_INET_UDP_DIAG) += udp_diag.o obj-$(CONFIG_NET_TCPPROBE) += tcp_probe.o obj-$(CONFIG_TCP_CONG_BIC) += tcp_bic.o obj-$(CONFIG_TCP_CONG_CUBIC) += tcp_cubic.o @@ -47,6 +48,7 @@ obj-$(CONFIG_TCP_CONG_SCALABLE) += tcp_scalable.o obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o +obj-$(CONFIG_CGROUP_MEM_RES_CTLR_KMEM) += tcp_memcontrol.o obj-$(CONFIG_NETLABEL) += cipso_ipv4.o obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \ diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index dd2b9478ddd1..f7b5670744f0 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -893,7 +893,7 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) EXPORT_SYMBOL(inet_ioctl); #ifdef CONFIG_COMPAT -int inet_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) +static int inet_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { struct sock *sk = sock->sk; int err = -ENOIOCTLCMD; @@ -1250,7 +1250,8 @@ out: return err; } -static struct sk_buff *inet_gso_segment(struct sk_buff *skb, u32 features) +static struct sk_buff *inet_gso_segment(struct sk_buff *skb, + netdev_features_t features) { struct sk_buff *segs = ERR_PTR(-EINVAL); struct iphdr *iph; @@ -1572,9 +1573,9 @@ static __net_init int ipv4_mib_init_net(struct net *net) sizeof(struct icmp_mib), __alignof__(struct icmp_mib)) < 0) goto err_icmp_mib; - if (snmp_mib_init((void __percpu **)net->mib.icmpmsg_statistics, - sizeof(struct icmpmsg_mib), - __alignof__(struct icmpmsg_mib)) < 0) + net->mib.icmpmsg_statistics = kzalloc(sizeof(struct icmpmsg_mib), + GFP_KERNEL); + if (!net->mib.icmpmsg_statistics) goto err_icmpmsg_mib; tcp_mib_init(net); @@ -1598,7 +1599,7 @@ err_tcp_mib: static __net_exit void ipv4_mib_exit_net(struct net *net) { - snmp_mib_free((void __percpu **)net->mib.icmpmsg_statistics); + kfree(net->mib.icmpmsg_statistics); snmp_mib_free((void __percpu **)net->mib.icmp_statistics); snmp_mib_free((void __percpu **)net->mib.udplite_statistics); snmp_mib_free((void __percpu **)net->mib.udp_statistics); @@ -1671,6 +1672,8 @@ static int __init inet_init(void) ip_static_sysctl_init(); #endif + tcp_prot.sysctl_mem = init_net.ipv4.sysctl_tcp_mem; + /* * Add all the base protocols. */ diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index c1f4154552fc..36d14406261e 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -136,8 +136,6 @@ static void ah_output_done(struct crypto_async_request *base, int err) memcpy(top_iph+1, iph+1, top_iph->ihl*4 - sizeof(struct iphdr)); } - err = ah->nexthdr; - kfree(AH_SKB_CB(skb)->tmp); xfrm_output_resume(skb, err); } @@ -264,12 +262,12 @@ static void ah_input_done(struct crypto_async_request *base, int err) if (err) goto out; + err = ah->nexthdr; + skb->network_header += ah_hlen; memcpy(skb_network_header(skb), work_iph, ihl); __skb_pull(skb, ah_hlen + ihl); skb_set_transport_header(skb, -ihl); - - err = ah->nexthdr; out: kfree(AH_SKB_CB(skb)->tmp); xfrm_input_resume(skb, err); @@ -371,8 +369,6 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb) if (err == -EINPROGRESS) goto out; - if (err == -EBUSY) - err = NET_XMIT_DROP; goto out_free; } diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 96a164aa1367..59402be133f0 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -112,11 +112,6 @@ #include <net/arp.h> #include <net/ax25.h> #include <net/netrom.h> -#if defined(CONFIG_ATM_CLIP) || defined(CONFIG_ATM_CLIP_MODULE) -#include <net/atmclip.h> -struct neigh_table *clip_tbl_hook; -EXPORT_SYMBOL(clip_tbl_hook); -#endif #include <asm/system.h> #include <linux/uaccess.h> @@ -126,7 +121,7 @@ EXPORT_SYMBOL(clip_tbl_hook); /* * Interface to generic neighbour cache. */ -static u32 arp_hash(const void *pkey, const struct net_device *dev, __u32 rnd); +static u32 arp_hash(const void *pkey, const struct net_device *dev, __u32 *hash_rnd); static int arp_constructor(struct neighbour *neigh); static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb); static void arp_error_report(struct neighbour *neigh, struct sk_buff *skb); @@ -164,7 +159,6 @@ static const struct neigh_ops arp_broken_ops = { struct neigh_table arp_tbl = { .family = AF_INET, - .entry_size = sizeof(struct neighbour) + 4, .key_len = 4, .hash = arp_hash, .constructor = arp_constructor, @@ -177,7 +171,7 @@ struct neigh_table arp_tbl = { .gc_staletime = 60 * HZ, .reachable_time = 30 * HZ, .delay_probe_time = 5 * HZ, - .queue_len = 3, + .queue_len_bytes = 64*1024, .ucast_probes = 3, .mcast_probes = 3, .anycast_delay = 1 * HZ, @@ -221,9 +215,9 @@ int arp_mc_map(__be32 addr, u8 *haddr, struct net_device *dev, int dir) static u32 arp_hash(const void *pkey, const struct net_device *dev, - __u32 hash_rnd) + __u32 *hash_rnd) { - return arp_hashfn(*(u32 *)pkey, dev, hash_rnd); + return arp_hashfn(*(u32 *)pkey, dev, *hash_rnd); } static int arp_constructor(struct neighbour *neigh) @@ -283,9 +277,9 @@ static int arp_constructor(struct neighbour *neigh) default: break; case ARPHRD_ROSE: -#if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE) +#if IS_ENABLED(CONFIG_AX25) case ARPHRD_AX25: -#if defined(CONFIG_NETROM) || defined(CONFIG_NETROM_MODULE) +#if IS_ENABLED(CONFIG_NETROM) case ARPHRD_NETROM: #endif neigh->ops = &arp_broken_ops; @@ -592,16 +586,18 @@ struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip, struct sk_buff *skb; struct arphdr *arp; unsigned char *arp_ptr; + int hlen = LL_RESERVED_SPACE(dev); + int tlen = dev->needed_tailroom; /* * Allocate a buffer */ - skb = alloc_skb(arp_hdr_len(dev) + LL_ALLOCATED_SPACE(dev), GFP_ATOMIC); + skb = alloc_skb(arp_hdr_len(dev) + hlen + tlen, GFP_ATOMIC); if (skb == NULL) return NULL; - skb_reserve(skb, LL_RESERVED_SPACE(dev)); + skb_reserve(skb, hlen); skb_reset_network_header(skb); arp = (struct arphdr *) skb_put(skb, arp_hdr_len(dev)); skb->dev = dev; @@ -633,13 +629,13 @@ struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip, arp->ar_pro = htons(ETH_P_IP); break; -#if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE) +#if IS_ENABLED(CONFIG_AX25) case ARPHRD_AX25: arp->ar_hrd = htons(ARPHRD_AX25); arp->ar_pro = htons(AX25_P_IP); break; -#if defined(CONFIG_NETROM) || defined(CONFIG_NETROM_MODULE) +#if IS_ENABLED(CONFIG_NETROM) case ARPHRD_NETROM: arp->ar_hrd = htons(ARPHRD_NETROM); arp->ar_pro = htons(AX25_P_IP); @@ -647,13 +643,13 @@ struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip, #endif #endif -#if defined(CONFIG_FDDI) || defined(CONFIG_FDDI_MODULE) +#if IS_ENABLED(CONFIG_FDDI) case ARPHRD_FDDI: arp->ar_hrd = htons(ARPHRD_ETHER); arp->ar_pro = htons(ETH_P_IP); break; #endif -#if defined(CONFIG_TR) || defined(CONFIG_TR_MODULE) +#if IS_ENABLED(CONFIG_TR) case ARPHRD_IEEE802_TR: arp->ar_hrd = htons(ARPHRD_IEEE802); arp->ar_pro = htons(ETH_P_IP); @@ -1040,7 +1036,7 @@ static int arp_req_set(struct net *net, struct arpreq *r, return -EINVAL; } switch (dev->type) { -#if defined(CONFIG_FDDI) || defined(CONFIG_FDDI_MODULE) +#if IS_ENABLED(CONFIG_FDDI) case ARPHRD_FDDI: /* * According to RFC 1390, FDDI devices should accept ARP @@ -1286,7 +1282,7 @@ void __init arp_init(void) } #ifdef CONFIG_PROC_FS -#if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE) +#if IS_ENABLED(CONFIG_AX25) /* ------------------------------------------------------------------------ */ /* @@ -1334,7 +1330,7 @@ static void arp_format_neigh_entry(struct seq_file *seq, read_lock(&n->lock); /* Convert hardware address to XX:XX:XX:XX ... form. */ -#if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE) +#if IS_ENABLED(CONFIG_AX25) if (hatype == ARPHRD_AX25 || hatype == ARPHRD_NETROM) ax2asc2((ax25_address *)n->ha, hbuffer); else { @@ -1347,7 +1343,7 @@ static void arp_format_neigh_entry(struct seq_file *seq, if (k != 0) --k; hbuffer[k] = 0; -#if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE) +#if IS_ENABLED(CONFIG_AX25) } #endif sprintf(tbuf, "%pI4", n->primary_key); diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index 2c2a98e402e7..86f3b885b4f3 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -476,7 +476,7 @@ int cipso_v4_doi_add(struct cipso_v4_doi *doi_def, doi = doi_def->doi; doi_type = doi_def->type; - if (doi_def == NULL || doi_def->doi == CIPSO_V4_DOI_UNKNOWN) + if (doi_def->doi == CIPSO_V4_DOI_UNKNOWN) goto doi_add_return; for (iter = 0; iter < CIPSO_V4_TAG_MAXCNT; iter++) { switch (doi_def->tags[iter]) { diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index bc19bd06dd00..e41c40f48cfe 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -291,7 +291,7 @@ static void inetdev_destroy(struct in_device *in_dev) inet_free_ifa(ifa); } - rcu_assign_pointer(dev->ip_ptr, NULL); + RCU_INIT_POINTER(dev->ip_ptr, NULL); devinet_sysctl_unregister(in_dev); neigh_parms_release(&arp_tbl, in_dev->arp_parms); @@ -1175,7 +1175,7 @@ static int inetdev_event(struct notifier_block *this, unsigned long event, switch (event) { case NETDEV_REGISTER: printk(KERN_DEBUG "inetdev_event: bug\n"); - rcu_assign_pointer(dev->ip_ptr, NULL); + RCU_INIT_POINTER(dev->ip_ptr, NULL); break; case NETDEV_UP: if (!inetdev_valid_mtu(dev->mtu)) @@ -1490,7 +1490,9 @@ static int devinet_conf_proc(ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { + int old_value = *(int *)ctl->data; int ret = proc_dointvec(ctl, write, buffer, lenp, ppos); + int new_value = *(int *)ctl->data; if (write) { struct ipv4_devconf *cnf = ctl->extra1; @@ -1501,6 +1503,9 @@ static int devinet_conf_proc(ctl_table *ctl, int write, if (cnf == net->ipv4.devconf_dflt) devinet_copy_dflt_conf(net, i); + if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1) + if ((new_value == 0) && (old_value != 0)) + rt_cache_flush(net, 0); } return ret; diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index a53bb1b5b118..799fc790b3cf 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -26,6 +26,7 @@ #include <linux/init.h> #include <linux/list.h> #include <linux/rcupdate.h> +#include <linux/export.h> #include <net/ip.h> #include <net/route.h> #include <net/tcp.h> @@ -66,6 +67,7 @@ int fib_lookup(struct net *net, struct flowi4 *flp, struct fib_result *res) return err; } +EXPORT_SYMBOL_GPL(fib_lookup); static int fib4_rule_action(struct fib_rule *rule, struct flowi *flp, int flags, struct fib_lookup_arg *arg) diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index de9e2978476f..2b555a5521e0 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -73,6 +73,7 @@ #include <linux/list.h> #include <linux/slab.h> #include <linux/prefetch.h> +#include <linux/export.h> #include <net/net_namespace.h> #include <net/ip.h> #include <net/protocol.h> @@ -1606,6 +1607,7 @@ found: rcu_read_unlock(); return ret; } +EXPORT_SYMBOL_GPL(fib_table_lookup); /* * Remove the leaf and return parent. @@ -1621,7 +1623,7 @@ static void trie_leaf_remove(struct trie *t, struct leaf *l) put_child(t, (struct tnode *)tp, cindex, NULL); trie_rebalance(t, tp); } else - rcu_assign_pointer(t->trie, NULL); + RCU_INIT_POINTER(t->trie, NULL); free_leaf(l); } diff --git a/net/ipv4/gre.c b/net/ipv4/gre.c index dbfc21de3479..8cb1ebb7cd74 100644 --- a/net/ipv4/gre.c +++ b/net/ipv4/gre.c @@ -34,7 +34,7 @@ int gre_add_protocol(const struct gre_protocol *proto, u8 version) if (gre_proto[version]) goto err_out_unlock; - rcu_assign_pointer(gre_proto[version], proto); + RCU_INIT_POINTER(gre_proto[version], proto); spin_unlock(&gre_proto_lock); return 0; @@ -54,7 +54,7 @@ int gre_del_protocol(const struct gre_protocol *proto, u8 version) if (rcu_dereference_protected(gre_proto[version], lockdep_is_held(&gre_proto_lock)) != proto) goto err_out_unlock; - rcu_assign_pointer(gre_proto[version], NULL); + RCU_INIT_POINTER(gre_proto[version], NULL); spin_unlock(&gre_proto_lock); synchronize_rcu(); return 0; diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 23ef31baa1af..ab188ae12fd9 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -1152,10 +1152,9 @@ static int __net_init icmp_sk_init(struct net *net) net->ipv4.icmp_sk[i] = sk; /* Enough space for 2 64K ICMP packets, including - * sk_buff struct overhead. + * sk_buff/skb_shared_info struct overhead. */ - sk->sk_sndbuf = - (2 * ((64 * 1024) + sizeof(struct sk_buff))); + sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024); /* * Speedup sock_wfree() diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index d577199eabd5..450e5d21ed2a 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -304,9 +304,11 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size) struct igmpv3_report *pig; struct net *net = dev_net(dev); struct flowi4 fl4; + int hlen = LL_RESERVED_SPACE(dev); + int tlen = dev->needed_tailroom; while (1) { - skb = alloc_skb(size + LL_ALLOCATED_SPACE(dev), + skb = alloc_skb(size + hlen + tlen, GFP_ATOMIC | __GFP_NOWARN); if (skb) break; @@ -327,7 +329,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size) skb_dst_set(skb, &rt->dst); skb->dev = dev; - skb_reserve(skb, LL_RESERVED_SPACE(dev)); + skb_reserve(skb, hlen); skb_reset_network_header(skb); pip = ip_hdr(skb); @@ -647,6 +649,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc, __be32 group = pmc ? pmc->multiaddr : 0; struct flowi4 fl4; __be32 dst; + int hlen, tlen; if (type == IGMPV3_HOST_MEMBERSHIP_REPORT) return igmpv3_send_report(in_dev, pmc); @@ -661,7 +664,9 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc, if (IS_ERR(rt)) return -1; - skb = alloc_skb(IGMP_SIZE+LL_ALLOCATED_SPACE(dev), GFP_ATOMIC); + hlen = LL_RESERVED_SPACE(dev); + tlen = dev->needed_tailroom; + skb = alloc_skb(IGMP_SIZE + hlen + tlen, GFP_ATOMIC); if (skb == NULL) { ip_rt_put(rt); return -1; @@ -669,7 +674,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc, skb_dst_set(skb, &rt->dst); - skb_reserve(skb, LL_RESERVED_SPACE(dev)); + skb_reserve(skb, hlen); skb_reset_network_header(skb); iph = ip_hdr(skb); @@ -875,6 +880,8 @@ static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, * to be intended in a v3 query. */ max_delay = IGMPV3_MRC(ih3->code)*(HZ/IGMP_TIMER_SCALE); + if (!max_delay) + max_delay = 1; /* can't mod w/ 0 */ } else { /* v3 */ if (!pskb_may_pull(skb, sizeof(struct igmpv3_query))) return; @@ -1009,7 +1016,7 @@ static void ip_mc_filter_add(struct in_device *in_dev, __be32 addr) /* Checking for IFF_MULTICAST here is WRONG-WRONG-WRONG. We will get multicast token leakage, when IFF_MULTICAST - is changed. This check should be done in dev->set_multicast_list + is changed. This check should be done in ndo_set_rx_mode routine. Something sort of: if (dev->mc_list && dev->flags&IFF_MULTICAST) { do it; } --ANK @@ -1574,7 +1581,7 @@ out_unlock: * Add multicast single-source filter to the interface list */ static int ip_mc_add1_src(struct ip_mc_list *pmc, int sfmode, - __be32 *psfsrc, int delta) + __be32 *psfsrc) { struct ip_sf_list *psf, *psf_prev; @@ -1709,14 +1716,15 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode, pmc->sfcount[sfmode]++; err = 0; for (i=0; i<sfcount; i++) { - err = ip_mc_add1_src(pmc, sfmode, &psfsrc[i], delta); + err = ip_mc_add1_src(pmc, sfmode, &psfsrc[i]); if (err) break; } if (err) { int j; - pmc->sfcount[sfmode]--; + if (!delta) + pmc->sfcount[sfmode]--; for (j=0; j<i; j++) (void) ip_mc_del1_src(pmc, sfmode, &psfsrc[j]); } else if (isexclude != (pmc->sfcount[MCAST_EXCLUDE] != 0)) { @@ -1835,7 +1843,7 @@ static int ip_mc_leave_src(struct sock *sk, struct ip_mc_socklist *iml, } err = ip_mc_del_src(in_dev, &iml->multi.imr_multiaddr.s_addr, iml->sfmode, psf->sl_count, psf->sl_addr, 0); - rcu_assign_pointer(iml->sflist, NULL); + RCU_INIT_POINTER(iml->sflist, NULL); /* decrease mem now to avoid the memleak warning */ atomic_sub(IP_SFLSIZE(psf->sl_max), &sk->sk_omem_alloc); kfree_rcu(psf, rcu); diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index c14d88ad348d..2e4e24476c4c 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -418,7 +418,7 @@ static inline u32 inet_synq_hash(const __be32 raddr, const __be16 rport, return jhash_2words((__force u32)raddr, (__force u32)rport, rnd) & (synq_hsize - 1); } -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) #define AF_INET_FAMILY(fam) ((fam) == AF_INET) #else #define AF_INET_FAMILY(fam) 1 @@ -588,10 +588,19 @@ void inet_csk_reqsk_queue_prune(struct sock *parent, } EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_prune); -struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req, - const gfp_t priority) +/** + * inet_csk_clone_lock - clone an inet socket, and lock its clone + * @sk: the socket to clone + * @req: request_sock + * @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc) + * + * Caller must unlock socket even in error path (bh_unlock_sock(newsk)) + */ +struct sock *inet_csk_clone_lock(const struct sock *sk, + const struct request_sock *req, + const gfp_t priority) { - struct sock *newsk = sk_clone(sk, priority); + struct sock *newsk = sk_clone_lock(sk, priority); if (newsk != NULL) { struct inet_connection_sock *newicsk = inet_csk(newsk); @@ -615,7 +624,7 @@ struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req, } return newsk; } -EXPORT_SYMBOL_GPL(inet_csk_clone); +EXPORT_SYMBOL_GPL(inet_csk_clone_lock); /* * At this point, there should be no process reference to this diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 389a2e6a17fd..fcf281819cd4 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -33,6 +33,7 @@ #include <linux/stddef.h> #include <linux/inet_diag.h> +#include <linux/sock_diag.h> static const struct inet_diag_handler **inet_diag_table; @@ -45,24 +46,22 @@ struct inet_diag_entry { u16 userlocks; }; -static struct sock *idiagnl; - #define INET_DIAG_PUT(skb, attrtype, attrlen) \ RTA_DATA(__RTA_PUT(skb, attrtype, attrlen)) static DEFINE_MUTEX(inet_diag_table_mutex); -static const struct inet_diag_handler *inet_diag_lock_handler(int type) +static const struct inet_diag_handler *inet_diag_lock_handler(int proto) { - if (!inet_diag_table[type]) - request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK, - NETLINK_INET_DIAG, type); + if (!inet_diag_table[proto]) + request_module("net-pf-%d-proto-%d-type-%d-%d", PF_NETLINK, + NETLINK_SOCK_DIAG, AF_INET, proto); mutex_lock(&inet_diag_table_mutex); - if (!inet_diag_table[type]) + if (!inet_diag_table[proto]) return ERR_PTR(-ENOENT); - return inet_diag_table[type]; + return inet_diag_table[proto]; } static inline void inet_diag_unlock_handler( @@ -71,21 +70,21 @@ static inline void inet_diag_unlock_handler( mutex_unlock(&inet_diag_table_mutex); } -static int inet_csk_diag_fill(struct sock *sk, - struct sk_buff *skb, - int ext, u32 pid, u32 seq, u16 nlmsg_flags, +int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, + struct sk_buff *skb, struct inet_diag_req_v2 *req, + u32 pid, u32 seq, u16 nlmsg_flags, const struct nlmsghdr *unlh) { const struct inet_sock *inet = inet_sk(sk); - const struct inet_connection_sock *icsk = inet_csk(sk); struct inet_diag_msg *r; struct nlmsghdr *nlh; void *info = NULL; struct inet_diag_meminfo *minfo = NULL; unsigned char *b = skb_tail_pointer(skb); const struct inet_diag_handler *handler; + int ext = req->idiag_ext; - handler = inet_diag_table[unlh->nlmsg_type]; + handler = inet_diag_table[req->sdiag_protocol]; BUG_ON(handler == NULL); nlh = NLMSG_PUT(skb, pid, seq, unlh->nlmsg_type, sizeof(*r)); @@ -97,42 +96,55 @@ static int inet_csk_diag_fill(struct sock *sk, if (ext & (1 << (INET_DIAG_MEMINFO - 1))) minfo = INET_DIAG_PUT(skb, INET_DIAG_MEMINFO, sizeof(*minfo)); - if (ext & (1 << (INET_DIAG_INFO - 1))) - info = INET_DIAG_PUT(skb, INET_DIAG_INFO, - handler->idiag_info_size); - - if ((ext & (1 << (INET_DIAG_CONG - 1))) && icsk->icsk_ca_ops) { - const size_t len = strlen(icsk->icsk_ca_ops->name); - - strcpy(INET_DIAG_PUT(skb, INET_DIAG_CONG, len + 1), - icsk->icsk_ca_ops->name); - } - r->idiag_family = sk->sk_family; r->idiag_state = sk->sk_state; r->idiag_timer = 0; r->idiag_retrans = 0; r->id.idiag_if = sk->sk_bound_dev_if; - r->id.idiag_cookie[0] = (u32)(unsigned long)sk; - r->id.idiag_cookie[1] = (u32)(((unsigned long)sk >> 31) >> 1); + sock_diag_save_cookie(sk, r->id.idiag_cookie); r->id.idiag_sport = inet->inet_sport; r->id.idiag_dport = inet->inet_dport; r->id.idiag_src[0] = inet->inet_rcv_saddr; r->id.idiag_dst[0] = inet->inet_daddr; -#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) + /* IPv6 dual-stack sockets use inet->tos for IPv4 connections, + * hence this needs to be included regardless of socket family. + */ + if (ext & (1 << (INET_DIAG_TOS - 1))) + RTA_PUT_U8(skb, INET_DIAG_TOS, inet->tos); + +#if IS_ENABLED(CONFIG_IPV6) if (r->idiag_family == AF_INET6) { const struct ipv6_pinfo *np = inet6_sk(sk); - ipv6_addr_copy((struct in6_addr *)r->id.idiag_src, - &np->rcv_saddr); - ipv6_addr_copy((struct in6_addr *)r->id.idiag_dst, - &np->daddr); + *(struct in6_addr *)r->id.idiag_src = np->rcv_saddr; + *(struct in6_addr *)r->id.idiag_dst = np->daddr; + if (ext & (1 << (INET_DIAG_TCLASS - 1))) + RTA_PUT_U8(skb, INET_DIAG_TCLASS, np->tclass); } #endif + r->idiag_uid = sock_i_uid(sk); + r->idiag_inode = sock_i_ino(sk); + + if (minfo) { + minfo->idiag_rmem = sk_rmem_alloc_get(sk); + minfo->idiag_wmem = sk->sk_wmem_queued; + minfo->idiag_fmem = sk->sk_forward_alloc; + minfo->idiag_tmem = sk_wmem_alloc_get(sk); + } + + if (ext & (1 << (INET_DIAG_SKMEMINFO - 1))) + if (sock_diag_put_meminfo(sk, skb, INET_DIAG_SKMEMINFO)) + goto rtattr_failure; + + if (icsk == NULL) { + r->idiag_rqueue = r->idiag_wqueue = 0; + goto out; + } + #define EXPIRES_IN_MS(tmo) DIV_ROUND_UP((tmo - jiffies) * 1000, HZ) if (icsk->icsk_pending == ICSK_TIME_RETRANS) { @@ -153,14 +165,14 @@ static int inet_csk_diag_fill(struct sock *sk, } #undef EXPIRES_IN_MS - r->idiag_uid = sock_i_uid(sk); - r->idiag_inode = sock_i_ino(sk); + if (ext & (1 << (INET_DIAG_INFO - 1))) + info = INET_DIAG_PUT(skb, INET_DIAG_INFO, sizeof(struct tcp_info)); - if (minfo) { - minfo->idiag_rmem = sk_rmem_alloc_get(sk); - minfo->idiag_wmem = sk->sk_wmem_queued; - minfo->idiag_fmem = sk->sk_forward_alloc; - minfo->idiag_tmem = sk_wmem_alloc_get(sk); + if ((ext & (1 << (INET_DIAG_CONG - 1))) && icsk->icsk_ca_ops) { + const size_t len = strlen(icsk->icsk_ca_ops->name); + + strcpy(INET_DIAG_PUT(skb, INET_DIAG_CONG, len + 1), + icsk->icsk_ca_ops->name); } handler->idiag_get_info(sk, r, info); @@ -169,6 +181,7 @@ static int inet_csk_diag_fill(struct sock *sk, icsk->icsk_ca_ops && icsk->icsk_ca_ops->get_info) icsk->icsk_ca_ops->get_info(sk, ext, skb); +out: nlh->nlmsg_len = skb_tail_pointer(skb) - b; return skb->len; @@ -177,10 +190,20 @@ nlmsg_failure: nlmsg_trim(skb, b); return -EMSGSIZE; } +EXPORT_SYMBOL_GPL(inet_sk_diag_fill); + +static int inet_csk_diag_fill(struct sock *sk, + struct sk_buff *skb, struct inet_diag_req_v2 *req, + u32 pid, u32 seq, u16 nlmsg_flags, + const struct nlmsghdr *unlh) +{ + return inet_sk_diag_fill(sk, inet_csk(sk), + skb, req, pid, seq, nlmsg_flags, unlh); +} static int inet_twsk_diag_fill(struct inet_timewait_sock *tw, - struct sk_buff *skb, int ext, u32 pid, - u32 seq, u16 nlmsg_flags, + struct sk_buff *skb, struct inet_diag_req_v2 *req, + u32 pid, u32 seq, u16 nlmsg_flags, const struct nlmsghdr *unlh) { long tmo; @@ -201,8 +224,7 @@ static int inet_twsk_diag_fill(struct inet_timewait_sock *tw, r->idiag_family = tw->tw_family; r->idiag_retrans = 0; r->id.idiag_if = tw->tw_bound_dev_if; - r->id.idiag_cookie[0] = (u32)(unsigned long)tw; - r->id.idiag_cookie[1] = (u32)(((unsigned long)tw >> 31) >> 1); + sock_diag_save_cookie(tw, r->id.idiag_cookie); r->id.idiag_sport = tw->tw_sport; r->id.idiag_dport = tw->tw_dport; r->id.idiag_src[0] = tw->tw_rcv_saddr; @@ -214,15 +236,13 @@ static int inet_twsk_diag_fill(struct inet_timewait_sock *tw, r->idiag_wqueue = 0; r->idiag_uid = 0; r->idiag_inode = 0; -#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) if (tw->tw_family == AF_INET6) { const struct inet6_timewait_sock *tw6 = inet6_twsk((struct sock *)tw); - ipv6_addr_copy((struct in6_addr *)r->id.idiag_src, - &tw6->tw_v6_rcv_saddr); - ipv6_addr_copy((struct in6_addr *)r->id.idiag_dst, - &tw6->tw_v6_daddr); + *(struct in6_addr *)r->id.idiag_src = tw6->tw_v6_rcv_saddr; + *(struct in6_addr *)r->id.idiag_dst = tw6->tw_v6_daddr; } #endif nlh->nlmsg_len = skb_tail_pointer(skb) - previous_tail; @@ -233,42 +253,31 @@ nlmsg_failure: } static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, - int ext, u32 pid, u32 seq, u16 nlmsg_flags, + struct inet_diag_req_v2 *r, u32 pid, u32 seq, u16 nlmsg_flags, const struct nlmsghdr *unlh) { if (sk->sk_state == TCP_TIME_WAIT) return inet_twsk_diag_fill((struct inet_timewait_sock *)sk, - skb, ext, pid, seq, nlmsg_flags, + skb, r, pid, seq, nlmsg_flags, unlh); - return inet_csk_diag_fill(sk, skb, ext, pid, seq, nlmsg_flags, unlh); + return inet_csk_diag_fill(sk, skb, r, pid, seq, nlmsg_flags, unlh); } -static int inet_diag_get_exact(struct sk_buff *in_skb, - const struct nlmsghdr *nlh) +int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *in_skb, + const struct nlmsghdr *nlh, struct inet_diag_req_v2 *req) { int err; struct sock *sk; - struct inet_diag_req *req = NLMSG_DATA(nlh); struct sk_buff *rep; - struct inet_hashinfo *hashinfo; - const struct inet_diag_handler *handler; - handler = inet_diag_lock_handler(nlh->nlmsg_type); - if (IS_ERR(handler)) { - err = PTR_ERR(handler); - goto unlock; - } - - hashinfo = handler->idiag_hashinfo; err = -EINVAL; - - if (req->idiag_family == AF_INET) { + if (req->sdiag_family == AF_INET) { sk = inet_lookup(&init_net, hashinfo, req->id.idiag_dst[0], req->id.idiag_dport, req->id.idiag_src[0], req->id.idiag_sport, req->id.idiag_if); } -#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) - else if (req->idiag_family == AF_INET6) { +#if IS_ENABLED(CONFIG_IPV6) + else if (req->sdiag_family == AF_INET6) { sk = inet6_lookup(&init_net, hashinfo, (struct in6_addr *)req->id.idiag_dst, req->id.idiag_dport, @@ -278,29 +287,26 @@ static int inet_diag_get_exact(struct sk_buff *in_skb, } #endif else { - goto unlock; + goto out_nosk; } err = -ENOENT; if (sk == NULL) - goto unlock; + goto out_nosk; - err = -ESTALE; - if ((req->id.idiag_cookie[0] != INET_DIAG_NOCOOKIE || - req->id.idiag_cookie[1] != INET_DIAG_NOCOOKIE) && - ((u32)(unsigned long)sk != req->id.idiag_cookie[0] || - (u32)((((unsigned long)sk) >> 31) >> 1) != req->id.idiag_cookie[1])) + err = sock_diag_check_cookie(sk, req->id.idiag_cookie); + if (err) goto out; err = -ENOMEM; rep = alloc_skb(NLMSG_SPACE((sizeof(struct inet_diag_msg) + sizeof(struct inet_diag_meminfo) + - handler->idiag_info_size + 64)), + sizeof(struct tcp_info) + 64)), GFP_KERNEL); if (!rep) goto out; - err = sk_diag_fill(sk, rep, req->idiag_ext, + err = sk_diag_fill(sk, rep, req, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, 0, nlh); if (err < 0) { @@ -308,7 +314,7 @@ static int inet_diag_get_exact(struct sk_buff *in_skb, kfree_skb(rep); goto out; } - err = netlink_unicast(idiagnl, rep, NETLINK_CB(in_skb).pid, + err = netlink_unicast(sock_diag_nlsk, rep, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); if (err > 0) err = 0; @@ -320,8 +326,25 @@ out: else sock_put(sk); } -unlock: +out_nosk: + return err; +} +EXPORT_SYMBOL_GPL(inet_diag_dump_one_icsk); + +static int inet_diag_get_exact(struct sk_buff *in_skb, + const struct nlmsghdr *nlh, + struct inet_diag_req_v2 *req) +{ + const struct inet_diag_handler *handler; + int err; + + handler = inet_diag_lock_handler(req->sdiag_protocol); + if (IS_ERR(handler)) + err = PTR_ERR(handler); + else + err = handler->dump_one(in_skb, nlh, req); inet_diag_unlock_handler(handler); + return err; } @@ -352,9 +375,12 @@ static int bitstring_match(const __be32 *a1, const __be32 *a2, int bits) } -static int inet_diag_bc_run(const void *bc, int len, - const struct inet_diag_entry *entry) +static int inet_diag_bc_run(const struct nlattr *_bc, + const struct inet_diag_entry *entry) { + const void *bc = nla_data(_bc); + int len = nla_len(_bc); + while (len > 0) { int yes = 1; const struct inet_diag_bc_op *op = bc; @@ -428,6 +454,35 @@ static int inet_diag_bc_run(const void *bc, int len, return len == 0; } +int inet_diag_bc_sk(const struct nlattr *bc, struct sock *sk) +{ + struct inet_diag_entry entry; + struct inet_sock *inet = inet_sk(sk); + + if (bc == NULL) + return 1; + + entry.family = sk->sk_family; +#if IS_ENABLED(CONFIG_IPV6) + if (entry.family == AF_INET6) { + struct ipv6_pinfo *np = inet6_sk(sk); + + entry.saddr = np->rcv_saddr.s6_addr32; + entry.daddr = np->daddr.s6_addr32; + } else +#endif + { + entry.saddr = &inet->inet_rcv_saddr; + entry.daddr = &inet->inet_daddr; + } + entry.sport = inet->inet_num; + entry.dport = ntohs(inet->inet_dport); + entry.userlocks = sk->sk_userlocks; + + return inet_diag_bc_run(bc, &entry); +} +EXPORT_SYMBOL_GPL(inet_diag_bc_sk); + static int valid_cc(const void *bc, int len, int cc) { while (len >= 0) { @@ -484,57 +539,29 @@ static int inet_diag_bc_audit(const void *bytecode, int bytecode_len) static int inet_csk_diag_dump(struct sock *sk, struct sk_buff *skb, - struct netlink_callback *cb) + struct netlink_callback *cb, + struct inet_diag_req_v2 *r, + const struct nlattr *bc) { - struct inet_diag_req *r = NLMSG_DATA(cb->nlh); - - if (nlmsg_attrlen(cb->nlh, sizeof(*r))) { - struct inet_diag_entry entry; - const struct nlattr *bc = nlmsg_find_attr(cb->nlh, - sizeof(*r), - INET_DIAG_REQ_BYTECODE); - struct inet_sock *inet = inet_sk(sk); - - entry.family = sk->sk_family; -#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) - if (entry.family == AF_INET6) { - struct ipv6_pinfo *np = inet6_sk(sk); - - entry.saddr = np->rcv_saddr.s6_addr32; - entry.daddr = np->daddr.s6_addr32; - } else -#endif - { - entry.saddr = &inet->inet_rcv_saddr; - entry.daddr = &inet->inet_daddr; - } - entry.sport = inet->inet_num; - entry.dport = ntohs(inet->inet_dport); - entry.userlocks = sk->sk_userlocks; + if (!inet_diag_bc_sk(bc, sk)) + return 0; - if (!inet_diag_bc_run(nla_data(bc), nla_len(bc), &entry)) - return 0; - } - - return inet_csk_diag_fill(sk, skb, r->idiag_ext, + return inet_csk_diag_fill(sk, skb, r, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh); } static int inet_twsk_diag_dump(struct inet_timewait_sock *tw, struct sk_buff *skb, - struct netlink_callback *cb) + struct netlink_callback *cb, + struct inet_diag_req_v2 *r, + const struct nlattr *bc) { - struct inet_diag_req *r = NLMSG_DATA(cb->nlh); - - if (nlmsg_attrlen(cb->nlh, sizeof(*r))) { + if (bc != NULL) { struct inet_diag_entry entry; - const struct nlattr *bc = nlmsg_find_attr(cb->nlh, - sizeof(*r), - INET_DIAG_REQ_BYTECODE); entry.family = tw->tw_family; -#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) if (tw->tw_family == AF_INET6) { struct inet6_timewait_sock *tw6 = inet6_twsk((struct sock *)tw); @@ -550,11 +577,11 @@ static int inet_twsk_diag_dump(struct inet_timewait_sock *tw, entry.dport = ntohs(tw->tw_dport); entry.userlocks = 0; - if (!inet_diag_bc_run(nla_data(bc), nla_len(bc), &entry)) + if (!inet_diag_bc_run(bc, &entry)) return 0; } - return inet_twsk_diag_fill(tw, skb, r->idiag_ext, + return inet_twsk_diag_fill(tw, skb, r, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh); } @@ -580,8 +607,7 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk, r->idiag_retrans = req->retrans; r->id.idiag_if = sk->sk_bound_dev_if; - r->id.idiag_cookie[0] = (u32)(unsigned long)req; - r->id.idiag_cookie[1] = (u32)(((unsigned long)req >> 31) >> 1); + sock_diag_save_cookie(req, r->id.idiag_cookie); tmo = req->expires - jiffies; if (tmo < 0) @@ -596,12 +622,10 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk, r->idiag_wqueue = 0; r->idiag_uid = sock_i_uid(sk); r->idiag_inode = 0; -#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) if (r->idiag_family == AF_INET6) { - ipv6_addr_copy((struct in6_addr *)r->id.idiag_src, - &inet6_rsk(req)->loc_addr); - ipv6_addr_copy((struct in6_addr *)r->id.idiag_dst, - &inet6_rsk(req)->rmt_addr); + *(struct in6_addr *)r->id.idiag_src = inet6_rsk(req)->loc_addr; + *(struct in6_addr *)r->id.idiag_dst = inet6_rsk(req)->rmt_addr; } #endif nlh->nlmsg_len = skb_tail_pointer(skb) - b; @@ -614,13 +638,13 @@ nlmsg_failure: } static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk, - struct netlink_callback *cb) + struct netlink_callback *cb, + struct inet_diag_req_v2 *r, + const struct nlattr *bc) { struct inet_diag_entry entry; - struct inet_diag_req *r = NLMSG_DATA(cb->nlh); struct inet_connection_sock *icsk = inet_csk(sk); struct listen_sock *lopt; - const struct nlattr *bc = NULL; struct inet_sock *inet = inet_sk(sk); int j, s_j; int reqnum, s_reqnum; @@ -640,9 +664,7 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk, if (!lopt || !lopt->qlen) goto out; - if (nlmsg_attrlen(cb->nlh, sizeof(*r))) { - bc = nlmsg_find_attr(cb->nlh, sizeof(*r), - INET_DIAG_REQ_BYTECODE); + if (bc != NULL) { entry.sport = inet->inet_num; entry.userlocks = sk->sk_userlocks; } @@ -662,21 +684,20 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk, if (bc) { entry.saddr = -#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) (entry.family == AF_INET6) ? inet6_rsk(req)->loc_addr.s6_addr32 : #endif &ireq->loc_addr; entry.daddr = -#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) (entry.family == AF_INET6) ? inet6_rsk(req)->rmt_addr.s6_addr32 : #endif &ireq->rmt_addr; entry.dport = ntohs(ireq->rmt_port); - if (!inet_diag_bc_run(nla_data(bc), - nla_len(bc), &entry)) + if (!inet_diag_bc_run(bc, &entry)) continue; } @@ -699,19 +720,11 @@ out: return err; } -static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) +void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb, + struct netlink_callback *cb, struct inet_diag_req_v2 *r, struct nlattr *bc) { int i, num; int s_i, s_num; - struct inet_diag_req *r = NLMSG_DATA(cb->nlh); - const struct inet_diag_handler *handler; - struct inet_hashinfo *hashinfo; - - handler = inet_diag_lock_handler(cb->nlh->nlmsg_type); - if (IS_ERR(handler)) - goto unlock; - - hashinfo = handler->idiag_hashinfo; s_i = cb->args[1]; s_num = num = cb->args[2]; @@ -736,6 +749,10 @@ static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) continue; } + if (r->sdiag_family != AF_UNSPEC && + sk->sk_family != r->sdiag_family) + goto next_listen; + if (r->id.idiag_sport != inet->inet_sport && r->id.idiag_sport) goto next_listen; @@ -745,7 +762,7 @@ static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) cb->args[3] > 0) goto syn_recv; - if (inet_csk_diag_dump(sk, skb, cb) < 0) { + if (inet_csk_diag_dump(sk, skb, cb, r, bc) < 0) { spin_unlock_bh(&ilb->lock); goto done; } @@ -754,7 +771,7 @@ syn_recv: if (!(r->idiag_states & TCPF_SYN_RECV)) goto next_listen; - if (inet_diag_dump_reqs(skb, sk, cb) < 0) { + if (inet_diag_dump_reqs(skb, sk, cb, r, bc) < 0) { spin_unlock_bh(&ilb->lock); goto done; } @@ -776,7 +793,7 @@ skip_listen_ht: } if (!(r->idiag_states & ~(TCPF_LISTEN | TCPF_SYN_RECV))) - goto unlock; + goto out; for (i = s_i; i <= hashinfo->ehash_mask; i++) { struct inet_ehash_bucket *head = &hashinfo->ehash[i]; @@ -801,13 +818,16 @@ skip_listen_ht: goto next_normal; if (!(r->idiag_states & (1 << sk->sk_state))) goto next_normal; + if (r->sdiag_family != AF_UNSPEC && + sk->sk_family != r->sdiag_family) + goto next_normal; if (r->id.idiag_sport != inet->inet_sport && r->id.idiag_sport) goto next_normal; if (r->id.idiag_dport != inet->inet_dport && r->id.idiag_dport) goto next_normal; - if (inet_csk_diag_dump(sk, skb, cb) < 0) { + if (inet_csk_diag_dump(sk, skb, cb, r, bc) < 0) { spin_unlock_bh(lock); goto done; } @@ -823,13 +843,16 @@ next_normal: if (num < s_num) goto next_dying; + if (r->sdiag_family != AF_UNSPEC && + tw->tw_family != r->sdiag_family) + goto next_dying; if (r->id.idiag_sport != tw->tw_sport && r->id.idiag_sport) goto next_dying; if (r->id.idiag_dport != tw->tw_dport && r->id.idiag_dport) goto next_dying; - if (inet_twsk_diag_dump(tw, skb, cb) < 0) { + if (inet_twsk_diag_dump(tw, skb, cb, r, bc) < 0) { spin_unlock_bh(lock); goto done; } @@ -843,12 +866,82 @@ next_dying: done: cb->args[1] = i; cb->args[2] = num; -unlock: +out: + ; +} +EXPORT_SYMBOL_GPL(inet_diag_dump_icsk); + +static int __inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, + struct inet_diag_req_v2 *r, struct nlattr *bc) +{ + const struct inet_diag_handler *handler; + + handler = inet_diag_lock_handler(r->sdiag_protocol); + if (!IS_ERR(handler)) + handler->dump(skb, cb, r, bc); inet_diag_unlock_handler(handler); + return skb->len; } -static int inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) +static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct nlattr *bc = NULL; + int hdrlen = sizeof(struct inet_diag_req_v2); + + if (nlmsg_attrlen(cb->nlh, hdrlen)) + bc = nlmsg_find_attr(cb->nlh, hdrlen, INET_DIAG_REQ_BYTECODE); + + return __inet_diag_dump(skb, cb, (struct inet_diag_req_v2 *)NLMSG_DATA(cb->nlh), bc); +} + +static inline int inet_diag_type2proto(int type) +{ + switch (type) { + case TCPDIAG_GETSOCK: + return IPPROTO_TCP; + case DCCPDIAG_GETSOCK: + return IPPROTO_DCCP; + default: + return 0; + } +} + +static int inet_diag_dump_compat(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct inet_diag_req *rc = NLMSG_DATA(cb->nlh); + struct inet_diag_req_v2 req; + struct nlattr *bc = NULL; + int hdrlen = sizeof(struct inet_diag_req); + + req.sdiag_family = AF_UNSPEC; /* compatibility */ + req.sdiag_protocol = inet_diag_type2proto(cb->nlh->nlmsg_type); + req.idiag_ext = rc->idiag_ext; + req.idiag_states = rc->idiag_states; + req.id = rc->id; + + if (nlmsg_attrlen(cb->nlh, hdrlen)) + bc = nlmsg_find_attr(cb->nlh, hdrlen, INET_DIAG_REQ_BYTECODE); + + return __inet_diag_dump(skb, cb, &req, bc); +} + +static int inet_diag_get_exact_compat(struct sk_buff *in_skb, + const struct nlmsghdr *nlh) +{ + struct inet_diag_req *rc = NLMSG_DATA(nlh); + struct inet_diag_req_v2 req; + + req.sdiag_family = rc->idiag_family; + req.sdiag_protocol = inet_diag_type2proto(nlh->nlmsg_type); + req.idiag_ext = rc->idiag_ext; + req.idiag_states = rc->idiag_states; + req.id = rc->id; + + return inet_diag_get_exact(in_skb, nlh, &req); +} + +static int inet_diag_rcv_msg_compat(struct sk_buff *skb, struct nlmsghdr *nlh) { int hdrlen = sizeof(struct inet_diag_req); @@ -868,28 +961,54 @@ static int inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) return -EINVAL; } - return netlink_dump_start(idiagnl, skb, nlh, - inet_diag_dump, NULL, 0); + return netlink_dump_start(sock_diag_nlsk, skb, nlh, + inet_diag_dump_compat, NULL, 0); } - return inet_diag_get_exact(skb, nlh); + return inet_diag_get_exact_compat(skb, nlh); } -static DEFINE_MUTEX(inet_diag_mutex); - -static void inet_diag_rcv(struct sk_buff *skb) +static int inet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h) { - mutex_lock(&inet_diag_mutex); - netlink_rcv_skb(skb, &inet_diag_rcv_msg); - mutex_unlock(&inet_diag_mutex); + int hdrlen = sizeof(struct inet_diag_req_v2); + + if (nlmsg_len(h) < hdrlen) + return -EINVAL; + + if (h->nlmsg_flags & NLM_F_DUMP) { + if (nlmsg_attrlen(h, hdrlen)) { + struct nlattr *attr; + attr = nlmsg_find_attr(h, hdrlen, + INET_DIAG_REQ_BYTECODE); + if (attr == NULL || + nla_len(attr) < sizeof(struct inet_diag_bc_op) || + inet_diag_bc_audit(nla_data(attr), nla_len(attr))) + return -EINVAL; + } + + return netlink_dump_start(sock_diag_nlsk, skb, h, + inet_diag_dump, NULL, 0); + } + + return inet_diag_get_exact(skb, h, (struct inet_diag_req_v2 *)NLMSG_DATA(h)); } +static struct sock_diag_handler inet_diag_handler = { + .family = AF_INET, + .dump = inet_diag_handler_dump, +}; + +static struct sock_diag_handler inet6_diag_handler = { + .family = AF_INET6, + .dump = inet_diag_handler_dump, +}; + int inet_diag_register(const struct inet_diag_handler *h) { const __u16 type = h->idiag_type; int err = -EINVAL; - if (type >= INET_DIAG_GETSOCK_MAX) + if (type >= IPPROTO_MAX) goto out; mutex_lock(&inet_diag_table_mutex); @@ -908,7 +1027,7 @@ void inet_diag_unregister(const struct inet_diag_handler *h) { const __u16 type = h->idiag_type; - if (type >= INET_DIAG_GETSOCK_MAX) + if (type >= IPPROTO_MAX) return; mutex_lock(&inet_diag_table_mutex); @@ -919,7 +1038,7 @@ EXPORT_SYMBOL_GPL(inet_diag_unregister); static int __init inet_diag_init(void) { - const int inet_diag_table_size = (INET_DIAG_GETSOCK_MAX * + const int inet_diag_table_size = (IPPROTO_MAX * sizeof(struct inet_diag_handler *)); int err = -ENOMEM; @@ -927,25 +1046,35 @@ static int __init inet_diag_init(void) if (!inet_diag_table) goto out; - idiagnl = netlink_kernel_create(&init_net, NETLINK_INET_DIAG, 0, - inet_diag_rcv, NULL, THIS_MODULE); - if (idiagnl == NULL) - goto out_free_table; - err = 0; + err = sock_diag_register(&inet_diag_handler); + if (err) + goto out_free_nl; + + err = sock_diag_register(&inet6_diag_handler); + if (err) + goto out_free_inet; + + sock_diag_register_inet_compat(inet_diag_rcv_msg_compat); out: return err; -out_free_table: + +out_free_inet: + sock_diag_unregister(&inet_diag_handler); +out_free_nl: kfree(inet_diag_table); goto out; } static void __exit inet_diag_exit(void) { - netlink_kernel_release(idiagnl); + sock_diag_unregister(&inet6_diag_handler); + sock_diag_unregister(&inet_diag_handler); + sock_diag_unregister_inet_compat(inet_diag_rcv_msg_compat); kfree(inet_diag_table); } module_init(inet_diag_init); module_exit(inet_diag_exit); MODULE_LICENSE("GPL"); -MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_INET_DIAG); +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2 /* AF_INET */); +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 10 /* AF_INET6 */); diff --git a/net/ipv4/inet_lro.c b/net/ipv4/inet_lro.c index ef7ae6049a51..cc280a3f4f96 100644 --- a/net/ipv4/inet_lro.c +++ b/net/ipv4/inet_lro.c @@ -244,11 +244,11 @@ static void lro_add_frags(struct net_lro_desc *lro_desc, skb->truesize += truesize; skb_frags[0].page_offset += hlen; - skb_frags[0].size -= hlen; + skb_frag_size_sub(&skb_frags[0], hlen); while (tcp_data_len > 0) { *(lro_desc->next_frag) = *skb_frags; - tcp_data_len -= skb_frags->size; + tcp_data_len -= skb_frag_size(skb_frags); lro_desc->next_frag++; skb_frags++; skb_shinfo(skb)->nr_frags++; @@ -400,14 +400,14 @@ static struct sk_buff *lro_gen_skb(struct net_lro_mgr *lro_mgr, skb_frags = skb_shinfo(skb)->frags; while (data_len > 0) { *skb_frags = *frags; - data_len -= frags->size; + data_len -= skb_frag_size(frags); skb_frags++; frags++; skb_shinfo(skb)->nr_frags++; } skb_shinfo(skb)->frags[0].page_offset += hdr_len; - skb_shinfo(skb)->frags[0].size -= hdr_len; + skb_frag_size_sub(&skb_shinfo(skb)->frags[0], hdr_len); skb->ip_summed = ip_summed; skb->csum = sum; @@ -433,7 +433,7 @@ static struct sk_buff *__lro_proc_segment(struct net_lro_mgr *lro_mgr, if (!lro_mgr->get_frag_header || lro_mgr->get_frag_header(frags, (void *)&mac_hdr, (void *)&iph, (void *)&tcph, &flags, priv)) { - mac_hdr = page_address(frags->page) + frags->page_offset; + mac_hdr = skb_frag_address(frags); goto out1; } diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 3c8dfa16614d..89168c6351ff 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -11,6 +11,7 @@ #include <linux/kernel.h> #include <linux/kmemcheck.h> #include <linux/slab.h> +#include <linux/module.h> #include <net/inet_hashtables.h> #include <net/inet_timewait_sock.h> #include <net/ip.h> @@ -183,6 +184,7 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int stat tw->tw_daddr = inet->inet_daddr; tw->tw_rcv_saddr = inet->inet_rcv_saddr; tw->tw_bound_dev_if = sk->sk_bound_dev_if; + tw->tw_tos = inet->tos; tw->tw_num = inet->inet_num; tw->tw_state = TCP_TIME_WAIT; tw->tw_substate = state; diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index 3b34d1c86270..29a07b6c7168 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -84,7 +84,7 @@ int ip_forward(struct sk_buff *skb) rt = skb_rtable(skb); - if (opt->is_strictroute && ip_hdr(skb)->daddr != rt->rt_gateway) + if (opt->is_strictroute && opt->nexthop != rt->rt_gateway) goto sr_failed; if (unlikely(skb->len > dst_mtu(&rt->dst) && !skb_is_gso(skb) && diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 0e0ab98abc6f..1f23a57aa9e6 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -392,7 +392,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) /* Is this the final fragment? */ if ((flags & IP_MF) == 0) { /* If we already have some bits beyond end - * or have different end, the segment is corrrupted. + * or have different end, the segment is corrupted. */ if (end < qp->q.len || ((qp->q.last_in & INET_FRAG_LAST_IN) && end != qp->q.len)) @@ -599,8 +599,8 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, head->next = clone; skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list; skb_frag_list_init(head); - for (i=0; i<skb_shinfo(head)->nr_frags; i++) - plen += skb_shinfo(head)->frags[i].size; + for (i = 0; i < skb_shinfo(head)->nr_frags; i++) + plen += skb_frag_size(&skb_shinfo(head)->frags[i]); clone->len = clone->data_len = head->data_len - plen; head->data_len -= clone->len; head->len -= clone->len; @@ -682,6 +682,42 @@ int ip_defrag(struct sk_buff *skb, u32 user) } EXPORT_SYMBOL(ip_defrag); +struct sk_buff *ip_check_defrag(struct sk_buff *skb, u32 user) +{ + const struct iphdr *iph; + u32 len; + + if (skb->protocol != htons(ETH_P_IP)) + return skb; + + if (!pskb_may_pull(skb, sizeof(struct iphdr))) + return skb; + + iph = ip_hdr(skb); + if (iph->ihl < 5 || iph->version != 4) + return skb; + if (!pskb_may_pull(skb, iph->ihl*4)) + return skb; + iph = ip_hdr(skb); + len = ntohs(iph->tot_len); + if (skb->len < len || len < (iph->ihl * 4)) + return skb; + + if (ip_is_fragment(ip_hdr(skb))) { + skb = skb_share_check(skb, GFP_ATOMIC); + if (skb) { + if (pskb_trim_rcsum(skb, len)) + return skb; + memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); + if (ip_defrag(skb, user)) + return NULL; + skb->rxhash = 0; + } + } + return skb; +} +EXPORT_SYMBOL(ip_check_defrag); + #ifdef CONFIG_SYSCTL static int zero; diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index d7bb94c48345..2b53a1f7abf6 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -46,7 +46,7 @@ #include <net/rtnetlink.h> #include <net/gre.h> -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) #include <net/ipv6.h> #include <net/ip6_fib.h> #include <net/ip6_route.h> @@ -171,7 +171,7 @@ struct pcpu_tstats { unsigned long rx_bytes; unsigned long tx_packets; unsigned long tx_bytes; -}; +} __attribute__((aligned(4*sizeof(unsigned long)))); static struct net_device_stats *ipgre_get_stats(struct net_device *dev) { @@ -729,9 +729,9 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev if ((dst = rt->rt_gateway) == 0) goto tx_error_icmp; } -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) else if (skb->protocol == htons(ETH_P_IPV6)) { - struct neighbour *neigh = dst_get_neighbour(skb_dst(skb)); + struct neighbour *neigh = dst_get_neighbour_noref(skb_dst(skb)); const struct in6_addr *addr6; int addr_type; @@ -799,7 +799,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev goto tx_error; } } -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) else if (skb->protocol == htons(ETH_P_IPV6)) { struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb); @@ -875,7 +875,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev if ((iph->ttl = tiph->ttl) == 0) { if (skb->protocol == htons(ETH_P_IP)) iph->ttl = old_iph->ttl; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) else if (skb->protocol == htons(ETH_P_IPV6)) iph->ttl = ((const struct ipv6hdr *)old_iph)->hop_limit; #endif diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index ec93335901dd..1e60f7679075 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -568,12 +568,13 @@ void ip_forward_options(struct sk_buff *skb) ) { if (srrptr + 3 > srrspace) break; - if (memcmp(&ip_hdr(skb)->daddr, &optptr[srrptr-1], 4) == 0) + if (memcmp(&opt->nexthop, &optptr[srrptr-1], 4) == 0) break; } if (srrptr + 3 <= srrspace) { opt->is_changed = 1; ip_rt_get_source(&optptr[srrptr-1], skb, rt); + ip_hdr(skb)->daddr = opt->nexthop; optptr[2] = srrptr+4; } else if (net_ratelimit()) printk(KERN_CRIT "ip_forward(): Argh! Destination lost!\n"); @@ -640,6 +641,7 @@ int ip_options_rcv_srr(struct sk_buff *skb) } if (srrptr <= srrspace) { opt->srr_is_hit = 1; + opt->nexthop = nexthop; opt->is_changed = 1; } return 0; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 8c6563361ab5..ff302bde8890 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -206,7 +206,7 @@ static inline int ip_finish_output2(struct sk_buff *skb) } rcu_read_lock(); - neigh = dst_get_neighbour(dst); + neigh = dst_get_neighbour_noref(dst); if (neigh) { int res = neigh_output(neigh, skb); @@ -319,6 +319,20 @@ int ip_output(struct sk_buff *skb) !(IPCB(skb)->flags & IPSKB_REROUTED)); } +/* + * copy saddr and daddr, possibly using 64bit load/stores + * Equivalent to : + * iph->saddr = fl4->saddr; + * iph->daddr = fl4->daddr; + */ +static void ip_copy_addrs(struct iphdr *iph, const struct flowi4 *fl4) +{ + BUILD_BUG_ON(offsetof(typeof(*fl4), daddr) != + offsetof(typeof(*fl4), saddr) + sizeof(fl4->saddr)); + memcpy(&iph->saddr, &fl4->saddr, + sizeof(fl4->saddr) + sizeof(fl4->daddr)); +} + int ip_queue_xmit(struct sk_buff *skb, struct flowi *fl) { struct sock *sk = skb->sk; @@ -381,8 +395,8 @@ packet_routed: iph->frag_off = 0; iph->ttl = ip_select_ttl(inet, &rt->dst); iph->protocol = sk->sk_protocol; - iph->saddr = fl4->saddr; - iph->daddr = fl4->daddr; + ip_copy_addrs(iph, fl4); + /* Transport layer set skb->h.foo itself. */ if (inet_opt && inet_opt->opt.optlen) { @@ -989,13 +1003,13 @@ alloc_new_skb: if (page && (left = PAGE_SIZE - off) > 0) { if (copy >= left) copy = left; - if (page != frag->page) { + if (page != skb_frag_page(frag)) { if (i == MAX_SKB_FRAGS) { err = -EMSGSIZE; goto error; } - get_page(page); skb_fill_page_desc(skb, i, page, off, 0); + skb_frag_ref(skb, i); frag = &skb_shinfo(skb)->frags[i]; } } else if (i < MAX_SKB_FRAGS) { @@ -1015,12 +1029,13 @@ alloc_new_skb: err = -EMSGSIZE; goto error; } - if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) { + if (getfrag(from, skb_frag_address(frag)+skb_frag_size(frag), + offset, copy, skb->len, skb) < 0) { err = -EFAULT; goto error; } cork->off += copy; - frag->size += copy; + skb_frag_size_add(frag, copy); skb->len += copy; skb->data_len += copy; skb->truesize += copy; @@ -1229,7 +1244,7 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page, if (len > size) len = size; if (skb_can_coalesce(skb, i, page, offset)) { - skb_shinfo(skb)->frags[i-1].size += len; + skb_frag_size_add(&skb_shinfo(skb)->frags[i-1], len); } else if (i < MAX_SKB_FRAGS) { get_page(page); skb_fill_page_desc(skb, i, page, offset, len); @@ -1336,8 +1351,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk, ip_select_ident(iph, &rt->dst, sk); iph->ttl = ttl; iph->protocol = sk->sk_protocol; - iph->saddr = fl4->saddr; - iph->daddr = fl4->daddr; + ip_copy_addrs(iph, fl4); if (opt) { iph->ihl += opt->optlen>>2; @@ -1465,7 +1479,7 @@ static int ip_reply_glue_bits(void *dptr, char *to, int offset, * structure to pass arguments. */ void ip_send_reply(struct sock *sk, struct sk_buff *skb, __be32 daddr, - struct ip_reply_arg *arg, unsigned int len) + const struct ip_reply_arg *arg, unsigned int len) { struct inet_sock *inet = inet_sk(sk); struct ip_options_data replyopts; @@ -1488,7 +1502,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, __be32 daddr, } flowi4_init_output(&fl4, arg->bound_dev_if, 0, - RT_TOS(ip_hdr(skb)->tos), + RT_TOS(arg->tos), RT_SCOPE_UNIVERSE, sk->sk_protocol, ip_reply_arg_flowi_flags(arg), daddr, rt->rt_spec_dst, @@ -1505,7 +1519,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, __be32 daddr, with locally disabled BH and that sk cannot be already spinlocked. */ bh_lock_sock(sk); - inet->tos = ip_hdr(skb)->tos; + inet->tos = arg->tos; sk->sk_priority = skb->priority; sk->sk_protocol = ip_hdr(skb)->protocol; sk->sk_bound_dev_if = arg->bound_dev_if; diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 8905e92f896a..8aa87c19fa00 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -33,10 +33,11 @@ #include <linux/netfilter.h> #include <linux/route.h> #include <linux/mroute.h> +#include <net/inet_ecn.h> #include <net/route.h> #include <net/xfrm.h> #include <net/compat.h> -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) #include <net/transp_v6.h> #endif @@ -54,20 +55,13 @@ /* * SOL_IP control messages. */ +#define PKTINFO_SKB_CB(__skb) ((struct in_pktinfo *)((__skb)->cb)) static void ip_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb) { - struct in_pktinfo info; - struct rtable *rt = skb_rtable(skb); + struct in_pktinfo info = *PKTINFO_SKB_CB(skb); info.ipi_addr.s_addr = ip_hdr(skb)->daddr; - if (rt) { - info.ipi_ifindex = rt->rt_iif; - info.ipi_spec_dst.s_addr = rt->rt_spec_dst; - } else { - info.ipi_ifindex = 0; - info.ipi_spec_dst.s_addr = 0; - } put_cmsg(msg, SOL_IP, IP_PKTINFO, sizeof(info), &info); } @@ -514,7 +508,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, sock_owned_by_user(sk)); if (inet->is_icsk) { struct inet_connection_sock *icsk = inet_csk(sk); -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) if (sk->sk_family == PF_INET || (!((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) && @@ -525,7 +519,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, if (opt) icsk->icsk_ext_hdr_len += opt->opt.optlen; icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie); -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) } #endif } @@ -578,8 +572,8 @@ static int do_ip_setsockopt(struct sock *sk, int level, break; case IP_TOS: /* This sets both TOS and Precedence */ if (sk->sk_type == SOCK_STREAM) { - val &= ~3; - val |= inet->tos & 3; + val &= ~INET_ECN_MASK; + val |= inet->tos & INET_ECN_MASK; } if (inet->tos != val) { inet->tos = val; @@ -961,7 +955,7 @@ mc_msf_out: break; case IP_TRANSPARENT: - if (!capable(CAP_NET_ADMIN)) { + if (!!val && !capable(CAP_NET_RAW) && !capable(CAP_NET_ADMIN)) { err = -EPERM; break; } @@ -991,20 +985,28 @@ e_inval: } /** - * ip_queue_rcv_skb - Queue an skb into sock receive queue + * ipv4_pktinfo_prepare - transfert some info from rtable to skb * @sk: socket * @skb: buffer * - * Queues an skb into socket receive queue. If IP_CMSG_PKTINFO option - * is not set, we drop skb dst entry now, while dst cache line is hot. + * To support IP_CMSG_PKTINFO option, we store rt_iif and rt_spec_dst + * in skb->cb[] before dst drop. + * This way, receiver doesnt make cache line misses to read rtable. */ -int ip_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) +void ipv4_pktinfo_prepare(struct sk_buff *skb) { - if (!(inet_sk(sk)->cmsg_flags & IP_CMSG_PKTINFO)) - skb_dst_drop(skb); - return sock_queue_rcv_skb(sk, skb); + struct in_pktinfo *pktinfo = PKTINFO_SKB_CB(skb); + const struct rtable *rt = skb_rtable(skb); + + if (rt) { + pktinfo->ipi_ifindex = rt->rt_iif; + pktinfo->ipi_spec_dst.s_addr = rt->rt_spec_dst; + } else { + pktinfo->ipi_ifindex = 0; + pktinfo->ipi_spec_dst.s_addr = 0; + } + skb_dst_drop(skb); } -EXPORT_SYMBOL(ip_queue_rcv_skb); int ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen) diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 472a8c4f1dc0..7e4ec9fc2cef 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -54,6 +54,7 @@ #include <linux/delay.h> #include <linux/nfs_fs.h> #include <linux/slab.h> +#include <linux/export.h> #include <net/net_namespace.h> #include <net/arp.h> #include <net/ip.h> @@ -252,6 +253,10 @@ static int __init ic_open_devs(void) } } + /* no point in waiting if we could not bring up at least one device */ + if (!ic_first_dev) + goto have_carrier; + /* wait for a carrier on at least one device */ start = jiffies; while (jiffies - start < msecs_to_jiffies(CONF_CARRIER_TIMEOUT)) { @@ -762,13 +767,15 @@ static void __init ic_bootp_send_if(struct ic_device *d, unsigned long jiffies_d struct sk_buff *skb; struct bootp_pkt *b; struct iphdr *h; + int hlen = LL_RESERVED_SPACE(dev); + int tlen = dev->needed_tailroom; /* Allocate packet */ - skb = alloc_skb(sizeof(struct bootp_pkt) + LL_ALLOCATED_SPACE(dev) + 15, + skb = alloc_skb(sizeof(struct bootp_pkt) + hlen + tlen + 15, GFP_KERNEL); if (!skb) return; - skb_reserve(skb, LL_RESERVED_SPACE(dev)); + skb_reserve(skb, hlen); b = (struct bootp_pkt *) skb_put(skb, sizeof(struct bootp_pkt)); memset(b, 0, sizeof(struct bootp_pkt)); @@ -821,8 +828,13 @@ static void __init ic_bootp_send_if(struct ic_device *d, unsigned long jiffies_d skb->dev = dev; skb->protocol = htons(ETH_P_IP); if (dev_hard_header(skb, dev, ntohs(skb->protocol), - dev->broadcast, dev->dev_addr, skb->len) < 0 || - dev_queue_xmit(skb) < 0) + dev->broadcast, dev->dev_addr, skb->len) < 0) { + kfree_skb(skb); + printk("E"); + return; + } + + if (dev_queue_xmit(skb) < 0) printk("E"); } diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 378b20b7ca6e..22a199315309 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -148,7 +148,7 @@ struct pcpu_tstats { unsigned long rx_bytes; unsigned long tx_packets; unsigned long tx_bytes; -}; +} __attribute__((aligned(4*sizeof(unsigned long)))); static struct net_device_stats *ipip_get_stats(struct net_device *dev) { @@ -285,6 +285,8 @@ static struct ip_tunnel * ipip_tunnel_locate(struct net *net, if (register_netdevice(dev) < 0) goto failed_free; + strcpy(nt->parms.name, dev->name); + dev_hold(dev); ipip_tunnel_link(ipn, nt); return nt; @@ -301,7 +303,7 @@ static void ipip_tunnel_uninit(struct net_device *dev) struct ipip_net *ipn = net_generic(net, ipip_net_id); if (dev == ipn->fb_tunnel_dev) - rcu_assign_pointer(ipn->tunnels_wc[0], NULL); + RCU_INIT_POINTER(ipn->tunnels_wc[0], NULL); else ipip_tunnel_unlink(ipn, netdev_priv(dev)); dev_put(dev); @@ -759,7 +761,6 @@ static int ipip_tunnel_init(struct net_device *dev) struct ip_tunnel *tunnel = netdev_priv(dev); tunnel->dev = dev; - strcpy(tunnel->parms.name, dev->name); memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4); memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); @@ -825,6 +826,7 @@ static void ipip_destroy_tunnels(struct ipip_net *ipn, struct list_head *head) static int __net_init ipip_init_net(struct net *net) { struct ipip_net *ipn = net_generic(net, ipip_net_id); + struct ip_tunnel *t; int err; ipn->tunnels[0] = ipn->tunnels_wc; @@ -848,6 +850,9 @@ static int __net_init ipip_init_net(struct net *net) if ((err = register_netdev(ipn->fb_tunnel_dev))) goto err_reg_dev; + t = netdev_priv(ipn->fb_tunnel_dev); + + strcpy(t->parms.name, ipn->fb_tunnel_dev->name); return 0; err_reg_dev: diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 58e879157976..7bc2db6db8d4 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -61,6 +61,7 @@ #include <linux/if_arp.h> #include <linux/netfilter_ipv4.h> #include <linux/compat.h> +#include <linux/export.h> #include <net/ipip.h> #include <net/checksum.h> #include <net/netlink.h> @@ -1176,7 +1177,7 @@ static void mrtsock_destruct(struct sock *sk) ipmr_for_each_table(mrt, net) { if (sk == rtnl_dereference(mrt->mroute_sk)) { IPV4_DEVCONF_ALL(net, MC_FORWARDING)--; - rcu_assign_pointer(mrt->mroute_sk, NULL); + RCU_INIT_POINTER(mrt->mroute_sk, NULL); mroute_clean_tables(mrt); } } @@ -1203,7 +1204,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi return -ENOENT; if (optname != MRT_INIT) { - if (sk != rcu_dereference_raw(mrt->mroute_sk) && + if (sk != rcu_access_pointer(mrt->mroute_sk) && !capable(CAP_NET_ADMIN)) return -EACCES; } @@ -1230,7 +1231,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi rtnl_unlock(); return ret; case MRT_DONE: - if (sk != rcu_dereference_raw(mrt->mroute_sk)) + if (sk != rcu_access_pointer(mrt->mroute_sk)) return -EACCES; return ip_ra_control(sk, 0, NULL); case MRT_ADD_VIF: @@ -1519,7 +1520,6 @@ static int ipmr_device_event(struct notifier_block *this, unsigned long event, v struct mr_table *mrt; struct vif_device *v; int ct; - LIST_HEAD(list); if (event != NETDEV_UNREGISTER) return NOTIFY_DONE; @@ -1528,10 +1528,9 @@ static int ipmr_device_event(struct notifier_block *this, unsigned long event, v v = &mrt->vif_table[0]; for (ct = 0; ct < mrt->maxvif; ct++, v++) { if (v->dev == dev) - vif_delete(mrt, ct, 1, &list); + vif_delete(mrt, ct, 1, NULL); } } - unregister_netdevice_many(&list); return NOTIFY_DONE; } diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index 929b27bdeb79..4f47e064e262 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -5,6 +5,7 @@ #include <linux/ip.h> #include <linux/skbuff.h> #include <linux/gfp.h> +#include <linux/export.h> #include <net/route.h> #include <net/xfrm.h> #include <net/ip.h> @@ -63,7 +64,8 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) /* Change in oif may mean change in hh_len. */ hh_len = skb_dst(skb)->dev->hard_header_len; if (skb_headroom(skb) < hh_len && - pskb_expand_head(skb, hh_len - skb_headroom(skb), 0, GFP_ATOMIC)) + pskb_expand_head(skb, HH_DATA_ALIGN(hh_len - skb_headroom(skb)), + 0, GFP_ATOMIC)) return -1; return 0; diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 1dfc18a03fd4..74dfc9e5211f 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -27,7 +27,7 @@ config NF_CONNTRACK_IPV4 config NF_CONNTRACK_PROC_COMPAT bool "proc/sysctl compatibility with old connection tracking" - depends on NF_CONNTRACK_IPV4 + depends on NF_CONNTRACK_PROCFS && NF_CONNTRACK_IPV4 default y help This option enables /proc and sysctl compatibility with the old @@ -76,11 +76,21 @@ config IP_NF_MATCH_AH config IP_NF_MATCH_ECN tristate '"ecn" match support' depends on NETFILTER_ADVANCED - help - This option adds a `ECN' match, which allows you to match against - the IPv4 and TCP header ECN fields. + select NETFILTER_XT_MATCH_ECN + ---help--- + This is a backwards-compat option for the user's convenience + (e.g. when running oldconfig). It selects + CONFIG_NETFILTER_XT_MATCH_ECN. + +config IP_NF_MATCH_RPFILTER + tristate '"rpfilter" reverse path filter match support' + depends on NETFILTER_ADVANCED + ---help--- + This option allows you to match packets whose replies would + go out via the interface the packet came in. To compile it as a module, choose M here. If unsure, say N. + The module will be called ipt_rpfilter. config IP_NF_MATCH_TTL tristate '"ttl" match support' @@ -325,7 +335,6 @@ config IP_NF_TARGET_TTL # raw + specific targets config IP_NF_RAW tristate 'raw table support (required for NOTRACK/TRACE)' - depends on NETFILTER_ADVANCED help This option adds a `raw' table to iptables. This table is the very first in the netfilter framework and hooks in at the PREROUTING diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index dca2082ec683..213a462b739b 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -49,7 +49,7 @@ obj-$(CONFIG_IP_NF_SECURITY) += iptable_security.o # matches obj-$(CONFIG_IP_NF_MATCH_AH) += ipt_ah.o -obj-$(CONFIG_IP_NF_MATCH_ECN) += ipt_ecn.o +obj-$(CONFIG_IP_NF_MATCH_RPFILTER) += ipt_rpfilter.o # targets obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index ffabb2674718..94d45e1f8882 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -404,6 +404,7 @@ __ipq_rcv_skb(struct sk_buff *skb) int status, type, pid, flags; unsigned int nlmsglen, skblen; struct nlmsghdr *nlh; + bool enable_timestamp = false; skblen = skb->len; if (skblen < sizeof(*nlh)) @@ -441,12 +442,13 @@ __ipq_rcv_skb(struct sk_buff *skb) RCV_SKB_FAIL(-EBUSY); } } else { - net_enable_timestamp(); + enable_timestamp = true; peer_pid = pid; } spin_unlock_bh(&queue_lock); - + if (enable_timestamp) + net_enable_timestamp(); status = ipq_receive_peer(NLMSG_DATA(nlh), type, nlmsglen - NLMSG_LENGTH(0)); if (status < 0) diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index db8d22db425f..a639967eb727 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -395,7 +395,6 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par) config = clusterip_config_init(cipinfo, e->ip.dst.s_addr, dev); if (!config) { - pr_info("cannot allocate config\n"); dev_put(dev); return -ENOMEM; } diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c index 9931152a78b5..2f210c79dc87 100644 --- a/net/ipv4/netfilter/ipt_MASQUERADE.c +++ b/net/ipv4/netfilter/ipt_MASQUERADE.c @@ -30,9 +30,9 @@ MODULE_DESCRIPTION("Xtables: automatic-address SNAT"); /* FIXME: Multiple targets. --RR */ static int masquerade_tg_check(const struct xt_tgchk_param *par) { - const struct nf_nat_multi_range_compat *mr = par->targinfo; + const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; - if (mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) { + if (mr->range[0].flags & NF_NAT_RANGE_MAP_IPS) { pr_debug("bad MAP_IPS.\n"); return -EINVAL; } @@ -49,8 +49,8 @@ masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par) struct nf_conn *ct; struct nf_conn_nat *nat; enum ip_conntrack_info ctinfo; - struct nf_nat_range newrange; - const struct nf_nat_multi_range_compat *mr; + struct nf_nat_ipv4_range newrange; + const struct nf_nat_ipv4_multi_range_compat *mr; const struct rtable *rt; __be32 newsrc; @@ -79,13 +79,13 @@ masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par) nat->masq_index = par->out->ifindex; /* Transfer from original range. */ - newrange = ((struct nf_nat_range) - { mr->range[0].flags | IP_NAT_RANGE_MAP_IPS, + newrange = ((struct nf_nat_ipv4_range) + { mr->range[0].flags | NF_NAT_RANGE_MAP_IPS, newsrc, newsrc, mr->range[0].min, mr->range[0].max }); /* Hand modified range to generic setup. */ - return nf_nat_setup_info(ct, &newrange, IP_NAT_MANIP_SRC); + return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC); } static int @@ -139,7 +139,7 @@ static struct xt_target masquerade_tg_reg __read_mostly = { .name = "MASQUERADE", .family = NFPROTO_IPV4, .target = masquerade_tg, - .targetsize = sizeof(struct nf_nat_multi_range_compat), + .targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat), .table = "nat", .hooks = 1 << NF_INET_POST_ROUTING, .checkentry = masquerade_tg_check, diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c index 6cdb298f1035..b5bfbbabf70d 100644 --- a/net/ipv4/netfilter/ipt_NETMAP.c +++ b/net/ipv4/netfilter/ipt_NETMAP.c @@ -24,9 +24,9 @@ MODULE_DESCRIPTION("Xtables: 1:1 NAT mapping of IPv4 subnets"); static int netmap_tg_check(const struct xt_tgchk_param *par) { - const struct nf_nat_multi_range_compat *mr = par->targinfo; + const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; - if (!(mr->range[0].flags & IP_NAT_RANGE_MAP_IPS)) { + if (!(mr->range[0].flags & NF_NAT_RANGE_MAP_IPS)) { pr_debug("bad MAP_IPS.\n"); return -EINVAL; } @@ -43,8 +43,8 @@ netmap_tg(struct sk_buff *skb, const struct xt_action_param *par) struct nf_conn *ct; enum ip_conntrack_info ctinfo; __be32 new_ip, netmask; - const struct nf_nat_multi_range_compat *mr = par->targinfo; - struct nf_nat_range newrange; + const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; + struct nf_nat_ipv4_range newrange; NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING || par->hooknum == NF_INET_POST_ROUTING || @@ -61,8 +61,8 @@ netmap_tg(struct sk_buff *skb, const struct xt_action_param *par) new_ip = ip_hdr(skb)->saddr & ~netmask; new_ip |= mr->range[0].min_ip & netmask; - newrange = ((struct nf_nat_range) - { mr->range[0].flags | IP_NAT_RANGE_MAP_IPS, + newrange = ((struct nf_nat_ipv4_range) + { mr->range[0].flags | NF_NAT_RANGE_MAP_IPS, new_ip, new_ip, mr->range[0].min, mr->range[0].max }); @@ -74,7 +74,7 @@ static struct xt_target netmap_tg_reg __read_mostly = { .name = "NETMAP", .family = NFPROTO_IPV4, .target = netmap_tg, - .targetsize = sizeof(struct nf_nat_multi_range_compat), + .targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat), .table = "nat", .hooks = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_POST_ROUTING) | diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c index 18a0656505a0..7c0103a5203e 100644 --- a/net/ipv4/netfilter/ipt_REDIRECT.c +++ b/net/ipv4/netfilter/ipt_REDIRECT.c @@ -28,9 +28,9 @@ MODULE_DESCRIPTION("Xtables: Connection redirection to localhost"); /* FIXME: Take multiple ranges --RR */ static int redirect_tg_check(const struct xt_tgchk_param *par) { - const struct nf_nat_multi_range_compat *mr = par->targinfo; + const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; - if (mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) { + if (mr->range[0].flags & NF_NAT_RANGE_MAP_IPS) { pr_debug("bad MAP_IPS.\n"); return -EINVAL; } @@ -47,8 +47,8 @@ redirect_tg(struct sk_buff *skb, const struct xt_action_param *par) struct nf_conn *ct; enum ip_conntrack_info ctinfo; __be32 newdst; - const struct nf_nat_multi_range_compat *mr = par->targinfo; - struct nf_nat_range newrange; + const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; + struct nf_nat_ipv4_range newrange; NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING || par->hooknum == NF_INET_LOCAL_OUT); @@ -76,20 +76,20 @@ redirect_tg(struct sk_buff *skb, const struct xt_action_param *par) } /* Transfer from original range. */ - newrange = ((struct nf_nat_range) - { mr->range[0].flags | IP_NAT_RANGE_MAP_IPS, + newrange = ((struct nf_nat_ipv4_range) + { mr->range[0].flags | NF_NAT_RANGE_MAP_IPS, newdst, newdst, mr->range[0].min, mr->range[0].max }); /* Hand modified range to generic setup. */ - return nf_nat_setup_info(ct, &newrange, IP_NAT_MANIP_DST); + return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_DST); } static struct xt_target redirect_tg_reg __read_mostly = { .name = "REDIRECT", .family = NFPROTO_IPV4, .target = redirect_tg, - .targetsize = sizeof(struct nf_nat_multi_range_compat), + .targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat), .table = "nat", .hooks = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT), .checkentry = redirect_tg_check, diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c index 446e0f467a17..ba5756d20165 100644 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ b/net/ipv4/netfilter/ipt_ULOG.c @@ -65,7 +65,7 @@ static unsigned int flushtimeout = 10; module_param(flushtimeout, uint, 0600); MODULE_PARM_DESC(flushtimeout, "buffer flush timeout (hundredths of a second)"); -static int nflog = 1; +static bool nflog = true; module_param(nflog, bool, 0400); MODULE_PARM_DESC(nflog, "register as internal netfilter logging module"); @@ -135,10 +135,8 @@ static struct sk_buff *ulog_alloc_skb(unsigned int size) * due to slab allocator restrictions */ n = max(size, nlbufsiz); - skb = alloc_skb(n, GFP_ATOMIC); + skb = alloc_skb(n, GFP_ATOMIC | __GFP_NOWARN); if (!skb) { - pr_debug("cannot alloc whole buffer %ub!\n", n); - if (n > size) { /* try to allocate only as much as we need for * current packet */ diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c deleted file mode 100644 index 2b57e52c746c..000000000000 --- a/net/ipv4/netfilter/ipt_ecn.c +++ /dev/null @@ -1,127 +0,0 @@ -/* IP tables module for matching the value of the IPv4 and TCP ECN bits - * - * (C) 2002 by Harald Welte <laforge@gnumonks.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -#include <linux/in.h> -#include <linux/ip.h> -#include <net/ip.h> -#include <linux/module.h> -#include <linux/skbuff.h> -#include <linux/tcp.h> - -#include <linux/netfilter/x_tables.h> -#include <linux/netfilter_ipv4/ip_tables.h> -#include <linux/netfilter_ipv4/ipt_ecn.h> - -MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); -MODULE_DESCRIPTION("Xtables: Explicit Congestion Notification (ECN) flag match for IPv4"); -MODULE_LICENSE("GPL"); - -static inline bool match_ip(const struct sk_buff *skb, - const struct ipt_ecn_info *einfo) -{ - return ((ip_hdr(skb)->tos & IPT_ECN_IP_MASK) == einfo->ip_ect) ^ - !!(einfo->invert & IPT_ECN_OP_MATCH_IP); -} - -static inline bool match_tcp(const struct sk_buff *skb, - const struct ipt_ecn_info *einfo, - bool *hotdrop) -{ - struct tcphdr _tcph; - const struct tcphdr *th; - - /* In practice, TCP match does this, so can't fail. But let's - * be good citizens. - */ - th = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph); - if (th == NULL) { - *hotdrop = false; - return false; - } - - if (einfo->operation & IPT_ECN_OP_MATCH_ECE) { - if (einfo->invert & IPT_ECN_OP_MATCH_ECE) { - if (th->ece == 1) - return false; - } else { - if (th->ece == 0) - return false; - } - } - - if (einfo->operation & IPT_ECN_OP_MATCH_CWR) { - if (einfo->invert & IPT_ECN_OP_MATCH_CWR) { - if (th->cwr == 1) - return false; - } else { - if (th->cwr == 0) - return false; - } - } - - return true; -} - -static bool ecn_mt(const struct sk_buff *skb, struct xt_action_param *par) -{ - const struct ipt_ecn_info *info = par->matchinfo; - - if (info->operation & IPT_ECN_OP_MATCH_IP) - if (!match_ip(skb, info)) - return false; - - if (info->operation & (IPT_ECN_OP_MATCH_ECE|IPT_ECN_OP_MATCH_CWR)) { - if (!match_tcp(skb, info, &par->hotdrop)) - return false; - } - - return true; -} - -static int ecn_mt_check(const struct xt_mtchk_param *par) -{ - const struct ipt_ecn_info *info = par->matchinfo; - const struct ipt_ip *ip = par->entryinfo; - - if (info->operation & IPT_ECN_OP_MATCH_MASK) - return -EINVAL; - - if (info->invert & IPT_ECN_OP_MATCH_MASK) - return -EINVAL; - - if (info->operation & (IPT_ECN_OP_MATCH_ECE|IPT_ECN_OP_MATCH_CWR) && - (ip->proto != IPPROTO_TCP || ip->invflags & IPT_INV_PROTO)) { - pr_info("cannot match TCP bits in rule for non-tcp packets\n"); - return -EINVAL; - } - - return 0; -} - -static struct xt_match ecn_mt_reg __read_mostly = { - .name = "ecn", - .family = NFPROTO_IPV4, - .match = ecn_mt, - .matchsize = sizeof(struct ipt_ecn_info), - .checkentry = ecn_mt_check, - .me = THIS_MODULE, -}; - -static int __init ecn_mt_init(void) -{ - return xt_register_match(&ecn_mt_reg); -} - -static void __exit ecn_mt_exit(void) -{ - xt_unregister_match(&ecn_mt_reg); -} - -module_init(ecn_mt_init); -module_exit(ecn_mt_exit); diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c new file mode 100644 index 000000000000..31371be8174b --- /dev/null +++ b/net/ipv4/netfilter/ipt_rpfilter.c @@ -0,0 +1,141 @@ +/* + * Copyright (c) 2011 Florian Westphal <fw@strlen.de> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * based on fib_frontend.c; Author: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/netdevice.h> +#include <linux/ip.h> +#include <net/ip.h> +#include <net/ip_fib.h> +#include <net/route.h> + +#include <linux/netfilter/xt_rpfilter.h> +#include <linux/netfilter/x_tables.h> + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Florian Westphal <fw@strlen.de>"); +MODULE_DESCRIPTION("iptables: ipv4 reverse path filter match"); + +/* don't try to find route from mcast/bcast/zeronet */ +static __be32 rpfilter_get_saddr(__be32 addr) +{ + if (ipv4_is_multicast(addr) || ipv4_is_lbcast(addr) || + ipv4_is_zeronet(addr)) + return 0; + return addr; +} + +static bool rpfilter_lookup_reverse(struct flowi4 *fl4, + const struct net_device *dev, u8 flags) +{ + struct fib_result res; + bool dev_match; + struct net *net = dev_net(dev); + int ret __maybe_unused; + + if (fib_lookup(net, fl4, &res)) + return false; + + if (res.type != RTN_UNICAST) { + if (res.type != RTN_LOCAL || !(flags & XT_RPFILTER_ACCEPT_LOCAL)) + return false; + } + dev_match = false; +#ifdef CONFIG_IP_ROUTE_MULTIPATH + for (ret = 0; ret < res.fi->fib_nhs; ret++) { + struct fib_nh *nh = &res.fi->fib_nh[ret]; + + if (nh->nh_dev == dev) { + dev_match = true; + break; + } + } +#else + if (FIB_RES_DEV(res) == dev) + dev_match = true; +#endif + if (dev_match || flags & XT_RPFILTER_LOOSE) + return FIB_RES_NH(res).nh_scope <= RT_SCOPE_HOST; + return dev_match; +} + +static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par) +{ + const struct xt_rpfilter_info *info; + const struct iphdr *iph; + struct flowi4 flow; + bool invert; + + info = par->matchinfo; + invert = info->flags & XT_RPFILTER_INVERT; + + if (par->in->flags & IFF_LOOPBACK) + return true ^ invert; + + iph = ip_hdr(skb); + if (ipv4_is_multicast(iph->daddr)) { + if (ipv4_is_zeronet(iph->saddr)) + return ipv4_is_local_multicast(iph->daddr) ^ invert; + flow.flowi4_iif = 0; + } else { + flow.flowi4_iif = dev_net(par->in)->loopback_dev->ifindex; + } + + flow.daddr = iph->saddr; + flow.saddr = rpfilter_get_saddr(iph->daddr); + flow.flowi4_oif = 0; + flow.flowi4_mark = info->flags & XT_RPFILTER_VALID_MARK ? skb->mark : 0; + flow.flowi4_tos = RT_TOS(iph->tos); + flow.flowi4_scope = RT_SCOPE_UNIVERSE; + + return rpfilter_lookup_reverse(&flow, par->in, info->flags) ^ invert; +} + +static int rpfilter_check(const struct xt_mtchk_param *par) +{ + const struct xt_rpfilter_info *info = par->matchinfo; + unsigned int options = ~XT_RPFILTER_OPTION_MASK; + if (info->flags & options) { + pr_info("unknown options encountered"); + return -EINVAL; + } + + if (strcmp(par->table, "mangle") != 0 && + strcmp(par->table, "raw") != 0) { + pr_info("match only valid in the \'raw\' " + "or \'mangle\' tables, not \'%s\'.\n", par->table); + return -EINVAL; + } + + return 0; +} + +static struct xt_match rpfilter_mt_reg __read_mostly = { + .name = "rpfilter", + .family = NFPROTO_IPV4, + .checkentry = rpfilter_check, + .match = rpfilter_mt, + .matchsize = sizeof(struct xt_rpfilter_info), + .hooks = (1 << NF_INET_PRE_ROUTING), + .me = THIS_MODULE +}; + +static int __init rpfilter_mt_init(void) +{ + return xt_register_match(&rpfilter_mt_reg); +} + +static void __exit rpfilter_mt_exit(void) +{ + xt_unregister_match(&rpfilter_mt_reg); +} + +module_init(rpfilter_mt_init); +module_exit(rpfilter_mt_exit); diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c index c37641e819f2..0e58f09e59fb 100644 --- a/net/ipv4/netfilter/iptable_filter.c +++ b/net/ipv4/netfilter/iptable_filter.c @@ -52,7 +52,7 @@ iptable_filter_hook(unsigned int hook, struct sk_buff *skb, static struct nf_hook_ops *filter_ops __read_mostly; /* Default to forward because I got too much mail already. */ -static int forward = NF_ACCEPT; +static bool forward = NF_ACCEPT; module_param(forward, bool, 0000); static int __net_init iptable_filter_net_init(struct net *net) diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c index 5585980fce2e..9682b36df38c 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c @@ -21,6 +21,7 @@ #include <net/netfilter/nf_conntrack_expect.h> #include <net/netfilter/nf_conntrack_acct.h> #include <linux/rculist_nulls.h> +#include <linux/export.h> struct ct_iter_state { struct seq_net_private p; diff --git a/net/ipv4/netfilter/nf_nat_amanda.c b/net/ipv4/netfilter/nf_nat_amanda.c index 703f366fd235..7b22382ff0e9 100644 --- a/net/ipv4/netfilter/nf_nat_amanda.c +++ b/net/ipv4/netfilter/nf_nat_amanda.c @@ -70,14 +70,14 @@ static unsigned int help(struct sk_buff *skb, static void __exit nf_nat_amanda_fini(void) { - rcu_assign_pointer(nf_nat_amanda_hook, NULL); + RCU_INIT_POINTER(nf_nat_amanda_hook, NULL); synchronize_rcu(); } static int __init nf_nat_amanda_init(void) { BUG_ON(nf_nat_amanda_hook != NULL); - rcu_assign_pointer(nf_nat_amanda_hook, help); + RCU_INIT_POINTER(nf_nat_amanda_hook, help); return 0; } diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index 3346de5d94d0..a708933dc230 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c @@ -30,7 +30,6 @@ #include <net/netfilter/nf_nat_helper.h> #include <net/netfilter/nf_conntrack_helper.h> #include <net/netfilter/nf_conntrack_l3proto.h> -#include <net/netfilter/nf_conntrack_l4proto.h> #include <net/netfilter/nf_conntrack_zones.h> static DEFINE_SPINLOCK(nf_nat_lock); @@ -57,7 +56,7 @@ hash_by_src(const struct net *net, u16 zone, /* Original src, to ensure we map it consistently if poss. */ hash = jhash_3words((__force u32)tuple->src.u3.ip, (__force u32)tuple->src.u.all ^ zone, - tuple->dst.protonum, 0); + tuple->dst.protonum, nf_conntrack_hash_rnd); return ((u64)hash * net->ipv4.nat_htable_size) >> 32; } @@ -82,14 +81,14 @@ EXPORT_SYMBOL(nf_nat_used_tuple); * that meet the constraints of range. */ static int in_range(const struct nf_conntrack_tuple *tuple, - const struct nf_nat_range *range) + const struct nf_nat_ipv4_range *range) { const struct nf_nat_protocol *proto; int ret = 0; /* If we are supposed to map IPs, then we must be in the range specified, otherwise let this drag us onto a new src IP. */ - if (range->flags & IP_NAT_RANGE_MAP_IPS) { + if (range->flags & NF_NAT_RANGE_MAP_IPS) { if (ntohl(tuple->src.u3.ip) < ntohl(range->min_ip) || ntohl(tuple->src.u3.ip) > ntohl(range->max_ip)) return 0; @@ -97,8 +96,8 @@ in_range(const struct nf_conntrack_tuple *tuple, rcu_read_lock(); proto = __nf_nat_proto_find(tuple->dst.protonum); - if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) || - proto->in_range(tuple, IP_NAT_MANIP_SRC, + if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) || + proto->in_range(tuple, NF_NAT_MANIP_SRC, &range->min, &range->max)) ret = 1; rcu_read_unlock(); @@ -123,7 +122,7 @@ static int find_appropriate_src(struct net *net, u16 zone, const struct nf_conntrack_tuple *tuple, struct nf_conntrack_tuple *result, - const struct nf_nat_range *range) + const struct nf_nat_ipv4_range *range) { unsigned int h = hash_by_src(net, zone, tuple); const struct nf_conn_nat *nat; @@ -157,7 +156,7 @@ find_appropriate_src(struct net *net, u16 zone, */ static void find_best_ips_proto(u16 zone, struct nf_conntrack_tuple *tuple, - const struct nf_nat_range *range, + const struct nf_nat_ipv4_range *range, const struct nf_conn *ct, enum nf_nat_manip_type maniptype) { @@ -166,10 +165,10 @@ find_best_ips_proto(u16 zone, struct nf_conntrack_tuple *tuple, u_int32_t minip, maxip, j; /* No IP mapping? Do nothing. */ - if (!(range->flags & IP_NAT_RANGE_MAP_IPS)) + if (!(range->flags & NF_NAT_RANGE_MAP_IPS)) return; - if (maniptype == IP_NAT_MANIP_SRC) + if (maniptype == NF_NAT_MANIP_SRC) var_ipp = &tuple->src.u3.ip; else var_ipp = &tuple->dst.u3.ip; @@ -189,7 +188,7 @@ find_best_ips_proto(u16 zone, struct nf_conntrack_tuple *tuple, minip = ntohl(range->min_ip); maxip = ntohl(range->max_ip); j = jhash_2words((__force u32)tuple->src.u3.ip, - range->flags & IP_NAT_RANGE_PERSISTENT ? + range->flags & NF_NAT_RANGE_PERSISTENT ? 0 : (__force u32)tuple->dst.u3.ip ^ zone, 0); j = ((u64)j * (maxip - minip + 1)) >> 32; *var_ipp = htonl(minip + j); @@ -204,7 +203,7 @@ find_best_ips_proto(u16 zone, struct nf_conntrack_tuple *tuple, static void get_unique_tuple(struct nf_conntrack_tuple *tuple, const struct nf_conntrack_tuple *orig_tuple, - const struct nf_nat_range *range, + const struct nf_nat_ipv4_range *range, struct nf_conn *ct, enum nf_nat_manip_type maniptype) { @@ -219,8 +218,8 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, This is only required for source (ie. NAT/masq) mappings. So far, we don't do local source mappings, so multiple manips not an issue. */ - if (maniptype == IP_NAT_MANIP_SRC && - !(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) { + if (maniptype == NF_NAT_MANIP_SRC && + !(range->flags & NF_NAT_RANGE_PROTO_RANDOM)) { /* try the original tuple first */ if (in_range(orig_tuple, range)) { if (!nf_nat_used_tuple(orig_tuple, ct)) { @@ -247,8 +246,8 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, proto = __nf_nat_proto_find(orig_tuple->dst.protonum); /* Only bother mapping if it's not already in range and unique */ - if (!(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) { - if (range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) { + if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM)) { + if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) { if (proto->in_range(tuple, maniptype, &range->min, &range->max) && (range->min.all == range->max.all || @@ -267,7 +266,7 @@ out: unsigned int nf_nat_setup_info(struct nf_conn *ct, - const struct nf_nat_range *range, + const struct nf_nat_ipv4_range *range, enum nf_nat_manip_type maniptype) { struct net *net = nf_ct_net(ct); @@ -284,8 +283,8 @@ nf_nat_setup_info(struct nf_conn *ct, } } - NF_CT_ASSERT(maniptype == IP_NAT_MANIP_SRC || - maniptype == IP_NAT_MANIP_DST); + NF_CT_ASSERT(maniptype == NF_NAT_MANIP_SRC || + maniptype == NF_NAT_MANIP_DST); BUG_ON(nf_nat_initialized(ct, maniptype)); /* What we've got will look like inverse of reply. Normally @@ -306,19 +305,19 @@ nf_nat_setup_info(struct nf_conn *ct, nf_conntrack_alter_reply(ct, &reply); /* Non-atomic: we own this at the moment. */ - if (maniptype == IP_NAT_MANIP_SRC) + if (maniptype == NF_NAT_MANIP_SRC) ct->status |= IPS_SRC_NAT; else ct->status |= IPS_DST_NAT; } - if (maniptype == IP_NAT_MANIP_SRC) { + if (maniptype == NF_NAT_MANIP_SRC) { unsigned int srchash; srchash = hash_by_src(net, nf_ct_zone(ct), &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); spin_lock_bh(&nf_nat_lock); - /* nf_conntrack_alter_reply might re-allocate exntension aera */ + /* nf_conntrack_alter_reply might re-allocate extension area */ nat = nfct_nat(ct); nat->ct = ct; hlist_add_head_rcu(&nat->bysource, @@ -327,7 +326,7 @@ nf_nat_setup_info(struct nf_conn *ct, } /* It's done. */ - if (maniptype == IP_NAT_MANIP_DST) + if (maniptype == NF_NAT_MANIP_DST) ct->status |= IPS_DST_NAT_DONE; else ct->status |= IPS_SRC_NAT_DONE; @@ -361,7 +360,7 @@ manip_pkt(u_int16_t proto, iph = (void *)skb->data + iphdroff; - if (maniptype == IP_NAT_MANIP_SRC) { + if (maniptype == NF_NAT_MANIP_SRC) { csum_replace4(&iph->check, iph->saddr, target->src.u3.ip); iph->saddr = target->src.u3.ip; } else { @@ -381,7 +380,7 @@ unsigned int nf_nat_packet(struct nf_conn *ct, unsigned long statusbit; enum nf_nat_manip_type mtype = HOOK2MANIP(hooknum); - if (mtype == IP_NAT_MANIP_SRC) + if (mtype == NF_NAT_MANIP_SRC) statusbit = IPS_SRC_NAT; else statusbit = IPS_DST_NAT; @@ -414,8 +413,7 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct, struct icmphdr icmp; struct iphdr ip; } *inside; - const struct nf_conntrack_l4proto *l4proto; - struct nf_conntrack_tuple inner, target; + struct nf_conntrack_tuple target; int hdrlen = ip_hdrlen(skb); enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); unsigned long statusbit; @@ -447,7 +445,7 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct, return 0; } - if (manip == IP_NAT_MANIP_SRC) + if (manip == NF_NAT_MANIP_SRC) statusbit = IPS_SRC_NAT; else statusbit = IPS_DST_NAT; @@ -463,16 +461,6 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct, "dir %s\n", skb, manip, dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY"); - /* rcu_read_lock()ed by nf_hook_slow */ - l4proto = __nf_ct_l4proto_find(PF_INET, inside->ip.protocol); - - if (!nf_ct_get_tuple(skb, hdrlen + sizeof(struct icmphdr), - (hdrlen + - sizeof(struct icmphdr) + inside->ip.ihl * 4), - (u_int16_t)AF_INET, inside->ip.protocol, - &inner, l3proto, l4proto)) - return 0; - /* Change inner back to look like incoming packet. We do the opposite manip on this hook to normal, because it might not pass all hooks (locally-generated ICMP). Consider incoming @@ -514,7 +502,7 @@ int nf_nat_protocol_register(const struct nf_nat_protocol *proto) ret = -EBUSY; goto out; } - rcu_assign_pointer(nf_nat_protos[proto->protonum], proto); + RCU_INIT_POINTER(nf_nat_protos[proto->protonum], proto); out: spin_unlock_bh(&nf_nat_lock); return ret; @@ -525,7 +513,7 @@ EXPORT_SYMBOL(nf_nat_protocol_register); void nf_nat_protocol_unregister(const struct nf_nat_protocol *proto) { spin_lock_bh(&nf_nat_lock); - rcu_assign_pointer(nf_nat_protos[proto->protonum], + RCU_INIT_POINTER(nf_nat_protos[proto->protonum], &nf_nat_unknown_protocol); spin_unlock_bh(&nf_nat_lock); synchronize_rcu(); @@ -575,26 +563,6 @@ static struct nf_ct_ext_type nat_extend __read_mostly = { #include <linux/netfilter/nfnetlink.h> #include <linux/netfilter/nfnetlink_conntrack.h> -static const struct nf_nat_protocol * -nf_nat_proto_find_get(u_int8_t protonum) -{ - const struct nf_nat_protocol *p; - - rcu_read_lock(); - p = __nf_nat_proto_find(protonum); - if (!try_module_get(p->me)) - p = &nf_nat_unknown_protocol; - rcu_read_unlock(); - - return p; -} - -static void -nf_nat_proto_put(const struct nf_nat_protocol *p) -{ - module_put(p->me); -} - static const struct nla_policy protonat_nla_policy[CTA_PROTONAT_MAX+1] = { [CTA_PROTONAT_PORT_MIN] = { .type = NLA_U16 }, [CTA_PROTONAT_PORT_MAX] = { .type = NLA_U16 }, @@ -602,7 +570,7 @@ static const struct nla_policy protonat_nla_policy[CTA_PROTONAT_MAX+1] = { static int nfnetlink_parse_nat_proto(struct nlattr *attr, const struct nf_conn *ct, - struct nf_nat_range *range) + struct nf_nat_ipv4_range *range) { struct nlattr *tb[CTA_PROTONAT_MAX+1]; const struct nf_nat_protocol *npt; @@ -612,21 +580,23 @@ static int nfnetlink_parse_nat_proto(struct nlattr *attr, if (err < 0) return err; - npt = nf_nat_proto_find_get(nf_ct_protonum(ct)); + rcu_read_lock(); + npt = __nf_nat_proto_find(nf_ct_protonum(ct)); if (npt->nlattr_to_range) err = npt->nlattr_to_range(tb, range); - nf_nat_proto_put(npt); + rcu_read_unlock(); return err; } static const struct nla_policy nat_nla_policy[CTA_NAT_MAX+1] = { [CTA_NAT_MINIP] = { .type = NLA_U32 }, [CTA_NAT_MAXIP] = { .type = NLA_U32 }, + [CTA_NAT_PROTO] = { .type = NLA_NESTED }, }; static int nfnetlink_parse_nat(const struct nlattr *nat, - const struct nf_conn *ct, struct nf_nat_range *range) + const struct nf_conn *ct, struct nf_nat_ipv4_range *range) { struct nlattr *tb[CTA_NAT_MAX+1]; int err; @@ -646,7 +616,7 @@ nfnetlink_parse_nat(const struct nlattr *nat, range->max_ip = nla_get_be32(tb[CTA_NAT_MAXIP]); if (range->min_ip) - range->flags |= IP_NAT_RANGE_MAP_IPS; + range->flags |= NF_NAT_RANGE_MAP_IPS; if (!tb[CTA_NAT_PROTO]) return 0; @@ -663,7 +633,7 @@ nfnetlink_parse_nat_setup(struct nf_conn *ct, enum nf_nat_manip_type manip, const struct nlattr *attr) { - struct nf_nat_range range; + struct nf_nat_ipv4_range range; if (nfnetlink_parse_nat(attr, ct, &range) < 0) return -EINVAL; @@ -736,10 +706,10 @@ static int __init nf_nat_init(void) /* Sew in builtin protocols. */ spin_lock_bh(&nf_nat_lock); for (i = 0; i < MAX_IP_NAT_PROTO; i++) - rcu_assign_pointer(nf_nat_protos[i], &nf_nat_unknown_protocol); - rcu_assign_pointer(nf_nat_protos[IPPROTO_TCP], &nf_nat_protocol_tcp); - rcu_assign_pointer(nf_nat_protos[IPPROTO_UDP], &nf_nat_protocol_udp); - rcu_assign_pointer(nf_nat_protos[IPPROTO_ICMP], &nf_nat_protocol_icmp); + RCU_INIT_POINTER(nf_nat_protos[i], &nf_nat_unknown_protocol); + RCU_INIT_POINTER(nf_nat_protos[IPPROTO_TCP], &nf_nat_protocol_tcp); + RCU_INIT_POINTER(nf_nat_protos[IPPROTO_UDP], &nf_nat_protocol_udp); + RCU_INIT_POINTER(nf_nat_protos[IPPROTO_ICMP], &nf_nat_protocol_icmp); spin_unlock_bh(&nf_nat_lock); /* Initialize fake conntrack so that NAT will skip it */ @@ -748,12 +718,12 @@ static int __init nf_nat_init(void) l3proto = nf_ct_l3proto_find_get((u_int16_t)AF_INET); BUG_ON(nf_nat_seq_adjust_hook != NULL); - rcu_assign_pointer(nf_nat_seq_adjust_hook, nf_nat_seq_adjust); + RCU_INIT_POINTER(nf_nat_seq_adjust_hook, nf_nat_seq_adjust); BUG_ON(nfnetlink_parse_nat_setup_hook != NULL); - rcu_assign_pointer(nfnetlink_parse_nat_setup_hook, + RCU_INIT_POINTER(nfnetlink_parse_nat_setup_hook, nfnetlink_parse_nat_setup); BUG_ON(nf_ct_nat_offset != NULL); - rcu_assign_pointer(nf_ct_nat_offset, nf_nat_get_offset); + RCU_INIT_POINTER(nf_ct_nat_offset, nf_nat_get_offset); return 0; cleanup_extend: @@ -766,9 +736,9 @@ static void __exit nf_nat_cleanup(void) unregister_pernet_subsys(&nf_nat_net_ops); nf_ct_l3proto_put(l3proto); nf_ct_extend_unregister(&nat_extend); - rcu_assign_pointer(nf_nat_seq_adjust_hook, NULL); - rcu_assign_pointer(nfnetlink_parse_nat_setup_hook, NULL); - rcu_assign_pointer(nf_ct_nat_offset, NULL); + RCU_INIT_POINTER(nf_nat_seq_adjust_hook, NULL); + RCU_INIT_POINTER(nfnetlink_parse_nat_setup_hook, NULL); + RCU_INIT_POINTER(nf_ct_nat_offset, NULL); synchronize_net(); } diff --git a/net/ipv4/netfilter/nf_nat_ftp.c b/net/ipv4/netfilter/nf_nat_ftp.c index dc73abb3fe27..e462a957d080 100644 --- a/net/ipv4/netfilter/nf_nat_ftp.c +++ b/net/ipv4/netfilter/nf_nat_ftp.c @@ -113,14 +113,14 @@ out: static void __exit nf_nat_ftp_fini(void) { - rcu_assign_pointer(nf_nat_ftp_hook, NULL); + RCU_INIT_POINTER(nf_nat_ftp_hook, NULL); synchronize_rcu(); } static int __init nf_nat_ftp_init(void) { BUG_ON(nf_nat_ftp_hook != NULL); - rcu_assign_pointer(nf_nat_ftp_hook, nf_nat_ftp); + RCU_INIT_POINTER(nf_nat_ftp_hook, nf_nat_ftp); return 0; } diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c index 790f3160e012..dc1dd912baf4 100644 --- a/net/ipv4/netfilter/nf_nat_h323.c +++ b/net/ipv4/netfilter/nf_nat_h323.c @@ -398,7 +398,7 @@ static int nat_h245(struct sk_buff *skb, struct nf_conn *ct, static void ip_nat_q931_expect(struct nf_conn *new, struct nf_conntrack_expect *this) { - struct nf_nat_range range; + struct nf_nat_ipv4_range range; if (this->tuple.src.u3.ip != 0) { /* Only accept calls from GK */ nf_nat_follow_master(new, this); @@ -409,16 +409,16 @@ static void ip_nat_q931_expect(struct nf_conn *new, BUG_ON(new->status & IPS_NAT_DONE_MASK); /* Change src to where master sends to */ - range.flags = IP_NAT_RANGE_MAP_IPS; + range.flags = NF_NAT_RANGE_MAP_IPS; range.min_ip = range.max_ip = new->tuplehash[!this->dir].tuple.src.u3.ip; - nf_nat_setup_info(new, &range, IP_NAT_MANIP_SRC); + nf_nat_setup_info(new, &range, NF_NAT_MANIP_SRC); /* For DST manip, map port here to where it's expected. */ - range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED); + range.flags = (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED); range.min = range.max = this->saved_proto; range.min_ip = range.max_ip = new->master->tuplehash[!this->dir].tuple.src.u3.ip; - nf_nat_setup_info(new, &range, IP_NAT_MANIP_DST); + nf_nat_setup_info(new, &range, NF_NAT_MANIP_DST); } /****************************************************************************/ @@ -496,21 +496,21 @@ static int nat_q931(struct sk_buff *skb, struct nf_conn *ct, static void ip_nat_callforwarding_expect(struct nf_conn *new, struct nf_conntrack_expect *this) { - struct nf_nat_range range; + struct nf_nat_ipv4_range range; /* This must be a fresh one. */ BUG_ON(new->status & IPS_NAT_DONE_MASK); /* Change src to where master sends to */ - range.flags = IP_NAT_RANGE_MAP_IPS; + range.flags = NF_NAT_RANGE_MAP_IPS; range.min_ip = range.max_ip = new->tuplehash[!this->dir].tuple.src.u3.ip; - nf_nat_setup_info(new, &range, IP_NAT_MANIP_SRC); + nf_nat_setup_info(new, &range, NF_NAT_MANIP_SRC); /* For DST manip, map port here to where it's expected. */ - range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED); + range.flags = (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED); range.min = range.max = this->saved_proto; range.min_ip = range.max_ip = this->saved_ip; - nf_nat_setup_info(new, &range, IP_NAT_MANIP_DST); + nf_nat_setup_info(new, &range, NF_NAT_MANIP_DST); } /****************************************************************************/ @@ -581,30 +581,30 @@ static int __init init(void) BUG_ON(nat_callforwarding_hook != NULL); BUG_ON(nat_q931_hook != NULL); - rcu_assign_pointer(set_h245_addr_hook, set_h245_addr); - rcu_assign_pointer(set_h225_addr_hook, set_h225_addr); - rcu_assign_pointer(set_sig_addr_hook, set_sig_addr); - rcu_assign_pointer(set_ras_addr_hook, set_ras_addr); - rcu_assign_pointer(nat_rtp_rtcp_hook, nat_rtp_rtcp); - rcu_assign_pointer(nat_t120_hook, nat_t120); - rcu_assign_pointer(nat_h245_hook, nat_h245); - rcu_assign_pointer(nat_callforwarding_hook, nat_callforwarding); - rcu_assign_pointer(nat_q931_hook, nat_q931); + RCU_INIT_POINTER(set_h245_addr_hook, set_h245_addr); + RCU_INIT_POINTER(set_h225_addr_hook, set_h225_addr); + RCU_INIT_POINTER(set_sig_addr_hook, set_sig_addr); + RCU_INIT_POINTER(set_ras_addr_hook, set_ras_addr); + RCU_INIT_POINTER(nat_rtp_rtcp_hook, nat_rtp_rtcp); + RCU_INIT_POINTER(nat_t120_hook, nat_t120); + RCU_INIT_POINTER(nat_h245_hook, nat_h245); + RCU_INIT_POINTER(nat_callforwarding_hook, nat_callforwarding); + RCU_INIT_POINTER(nat_q931_hook, nat_q931); return 0; } /****************************************************************************/ static void __exit fini(void) { - rcu_assign_pointer(set_h245_addr_hook, NULL); - rcu_assign_pointer(set_h225_addr_hook, NULL); - rcu_assign_pointer(set_sig_addr_hook, NULL); - rcu_assign_pointer(set_ras_addr_hook, NULL); - rcu_assign_pointer(nat_rtp_rtcp_hook, NULL); - rcu_assign_pointer(nat_t120_hook, NULL); - rcu_assign_pointer(nat_h245_hook, NULL); - rcu_assign_pointer(nat_callforwarding_hook, NULL); - rcu_assign_pointer(nat_q931_hook, NULL); + RCU_INIT_POINTER(set_h245_addr_hook, NULL); + RCU_INIT_POINTER(set_h225_addr_hook, NULL); + RCU_INIT_POINTER(set_sig_addr_hook, NULL); + RCU_INIT_POINTER(set_ras_addr_hook, NULL); + RCU_INIT_POINTER(nat_rtp_rtcp_hook, NULL); + RCU_INIT_POINTER(nat_t120_hook, NULL); + RCU_INIT_POINTER(nat_h245_hook, NULL); + RCU_INIT_POINTER(nat_callforwarding_hook, NULL); + RCU_INIT_POINTER(nat_q931_hook, NULL); synchronize_rcu(); } diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c index ebc5f8894f99..af65958f6308 100644 --- a/net/ipv4/netfilter/nf_nat_helper.c +++ b/net/ipv4/netfilter/nf_nat_helper.c @@ -253,12 +253,6 @@ nf_nat_mangle_udp_packet(struct sk_buff *skb, struct udphdr *udph; int datalen, oldlen; - /* UDP helpers might accidentally mangle the wrong packet */ - iph = ip_hdr(skb); - if (skb->len < iph->ihl*4 + sizeof(*udph) + - match_offset + match_len) - return 0; - if (!skb_make_writable(skb, skb->len)) return 0; @@ -430,22 +424,22 @@ nf_nat_seq_adjust(struct sk_buff *skb, void nf_nat_follow_master(struct nf_conn *ct, struct nf_conntrack_expect *exp) { - struct nf_nat_range range; + struct nf_nat_ipv4_range range; /* This must be a fresh one. */ BUG_ON(ct->status & IPS_NAT_DONE_MASK); /* Change src to where master sends to */ - range.flags = IP_NAT_RANGE_MAP_IPS; + range.flags = NF_NAT_RANGE_MAP_IPS; range.min_ip = range.max_ip = ct->master->tuplehash[!exp->dir].tuple.dst.u3.ip; - nf_nat_setup_info(ct, &range, IP_NAT_MANIP_SRC); + nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC); /* For DST manip, map port here to where it's expected. */ - range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED); + range.flags = (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED); range.min = range.max = exp->saved_proto; range.min_ip = range.max_ip = ct->master->tuplehash[!exp->dir].tuple.src.u3.ip; - nf_nat_setup_info(ct, &range, IP_NAT_MANIP_DST); + nf_nat_setup_info(ct, &range, NF_NAT_MANIP_DST); } EXPORT_SYMBOL(nf_nat_follow_master); diff --git a/net/ipv4/netfilter/nf_nat_irc.c b/net/ipv4/netfilter/nf_nat_irc.c index 535e1a802356..979ae165f4ef 100644 --- a/net/ipv4/netfilter/nf_nat_irc.c +++ b/net/ipv4/netfilter/nf_nat_irc.c @@ -75,14 +75,14 @@ static unsigned int help(struct sk_buff *skb, static void __exit nf_nat_irc_fini(void) { - rcu_assign_pointer(nf_nat_irc_hook, NULL); + RCU_INIT_POINTER(nf_nat_irc_hook, NULL); synchronize_rcu(); } static int __init nf_nat_irc_init(void) { BUG_ON(nf_nat_irc_hook != NULL); - rcu_assign_pointer(nf_nat_irc_hook, help); + RCU_INIT_POINTER(nf_nat_irc_hook, help); return 0; } diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c index 4c060038d29f..c273d58980ae 100644 --- a/net/ipv4/netfilter/nf_nat_pptp.c +++ b/net/ipv4/netfilter/nf_nat_pptp.c @@ -47,7 +47,7 @@ static void pptp_nat_expected(struct nf_conn *ct, struct nf_conntrack_tuple t; const struct nf_ct_pptp_master *ct_pptp_info; const struct nf_nat_pptp *nat_pptp_info; - struct nf_nat_range range; + struct nf_nat_ipv4_range range; ct_pptp_info = &nfct_help(master)->help.ct_pptp_info; nat_pptp_info = &nfct_nat(master)->help.nat_pptp_info; @@ -88,24 +88,24 @@ static void pptp_nat_expected(struct nf_conn *ct, BUG_ON(ct->status & IPS_NAT_DONE_MASK); /* Change src to where master sends to */ - range.flags = IP_NAT_RANGE_MAP_IPS; + range.flags = NF_NAT_RANGE_MAP_IPS; range.min_ip = range.max_ip = ct->master->tuplehash[!exp->dir].tuple.dst.u3.ip; if (exp->dir == IP_CT_DIR_ORIGINAL) { - range.flags |= IP_NAT_RANGE_PROTO_SPECIFIED; + range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED; range.min = range.max = exp->saved_proto; } - nf_nat_setup_info(ct, &range, IP_NAT_MANIP_SRC); + nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC); /* For DST manip, map port here to where it's expected. */ - range.flags = IP_NAT_RANGE_MAP_IPS; + range.flags = NF_NAT_RANGE_MAP_IPS; range.min_ip = range.max_ip = ct->master->tuplehash[!exp->dir].tuple.src.u3.ip; if (exp->dir == IP_CT_DIR_REPLY) { - range.flags |= IP_NAT_RANGE_PROTO_SPECIFIED; + range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED; range.min = range.max = exp->saved_proto; } - nf_nat_setup_info(ct, &range, IP_NAT_MANIP_DST); + nf_nat_setup_info(ct, &range, NF_NAT_MANIP_DST); } /* outbound packets == from PNS to PAC */ @@ -282,25 +282,25 @@ static int __init nf_nat_helper_pptp_init(void) nf_nat_need_gre(); BUG_ON(nf_nat_pptp_hook_outbound != NULL); - rcu_assign_pointer(nf_nat_pptp_hook_outbound, pptp_outbound_pkt); + RCU_INIT_POINTER(nf_nat_pptp_hook_outbound, pptp_outbound_pkt); BUG_ON(nf_nat_pptp_hook_inbound != NULL); - rcu_assign_pointer(nf_nat_pptp_hook_inbound, pptp_inbound_pkt); + RCU_INIT_POINTER(nf_nat_pptp_hook_inbound, pptp_inbound_pkt); BUG_ON(nf_nat_pptp_hook_exp_gre != NULL); - rcu_assign_pointer(nf_nat_pptp_hook_exp_gre, pptp_exp_gre); + RCU_INIT_POINTER(nf_nat_pptp_hook_exp_gre, pptp_exp_gre); BUG_ON(nf_nat_pptp_hook_expectfn != NULL); - rcu_assign_pointer(nf_nat_pptp_hook_expectfn, pptp_nat_expected); + RCU_INIT_POINTER(nf_nat_pptp_hook_expectfn, pptp_nat_expected); return 0; } static void __exit nf_nat_helper_pptp_fini(void) { - rcu_assign_pointer(nf_nat_pptp_hook_expectfn, NULL); - rcu_assign_pointer(nf_nat_pptp_hook_exp_gre, NULL); - rcu_assign_pointer(nf_nat_pptp_hook_inbound, NULL); - rcu_assign_pointer(nf_nat_pptp_hook_outbound, NULL); + RCU_INIT_POINTER(nf_nat_pptp_hook_expectfn, NULL); + RCU_INIT_POINTER(nf_nat_pptp_hook_exp_gre, NULL); + RCU_INIT_POINTER(nf_nat_pptp_hook_inbound, NULL); + RCU_INIT_POINTER(nf_nat_pptp_hook_outbound, NULL); synchronize_rcu(); } diff --git a/net/ipv4/netfilter/nf_nat_proto_common.c b/net/ipv4/netfilter/nf_nat_proto_common.c index f52d41ea0690..9993bc93e102 100644 --- a/net/ipv4/netfilter/nf_nat_proto_common.c +++ b/net/ipv4/netfilter/nf_nat_proto_common.c @@ -12,6 +12,7 @@ #include <linux/ip.h> #include <linux/netfilter.h> +#include <linux/export.h> #include <net/secure_seq.h> #include <net/netfilter/nf_nat.h> #include <net/netfilter/nf_nat_core.h> @@ -25,7 +26,7 @@ bool nf_nat_proto_in_range(const struct nf_conntrack_tuple *tuple, { __be16 port; - if (maniptype == IP_NAT_MANIP_SRC) + if (maniptype == NF_NAT_MANIP_SRC) port = tuple->src.u.all; else port = tuple->dst.u.all; @@ -36,7 +37,7 @@ bool nf_nat_proto_in_range(const struct nf_conntrack_tuple *tuple, EXPORT_SYMBOL_GPL(nf_nat_proto_in_range); void nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple, - const struct nf_nat_range *range, + const struct nf_nat_ipv4_range *range, enum nf_nat_manip_type maniptype, const struct nf_conn *ct, u_int16_t *rover) @@ -45,15 +46,15 @@ void nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple, __be16 *portptr; u_int16_t off; - if (maniptype == IP_NAT_MANIP_SRC) + if (maniptype == NF_NAT_MANIP_SRC) portptr = &tuple->src.u.all; else portptr = &tuple->dst.u.all; /* If no range specified... */ - if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) { + if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) { /* If it's dst rewrite, can't change port */ - if (maniptype == IP_NAT_MANIP_DST) + if (maniptype == NF_NAT_MANIP_DST) return; if (ntohs(*portptr) < 1024) { @@ -74,9 +75,9 @@ void nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple, range_size = ntohs(range->max.all) - min + 1; } - if (range->flags & IP_NAT_RANGE_PROTO_RANDOM) + if (range->flags & NF_NAT_RANGE_PROTO_RANDOM) off = secure_ipv4_port_ephemeral(tuple->src.u3.ip, tuple->dst.u3.ip, - maniptype == IP_NAT_MANIP_SRC + maniptype == NF_NAT_MANIP_SRC ? tuple->dst.u.all : tuple->src.u.all); else @@ -86,7 +87,7 @@ void nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple, *portptr = htons(min + off % range_size); if (++i != range_size && nf_nat_used_tuple(tuple, ct)) continue; - if (!(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) + if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM)) *rover = off; return; } @@ -95,31 +96,19 @@ void nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple, EXPORT_SYMBOL_GPL(nf_nat_proto_unique_tuple); #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) -int nf_nat_proto_range_to_nlattr(struct sk_buff *skb, - const struct nf_nat_range *range) -{ - NLA_PUT_BE16(skb, CTA_PROTONAT_PORT_MIN, range->min.all); - NLA_PUT_BE16(skb, CTA_PROTONAT_PORT_MAX, range->max.all); - return 0; - -nla_put_failure: - return -1; -} -EXPORT_SYMBOL_GPL(nf_nat_proto_nlattr_to_range); - int nf_nat_proto_nlattr_to_range(struct nlattr *tb[], - struct nf_nat_range *range) + struct nf_nat_ipv4_range *range) { if (tb[CTA_PROTONAT_PORT_MIN]) { range->min.all = nla_get_be16(tb[CTA_PROTONAT_PORT_MIN]); range->max.all = range->min.tcp.port; - range->flags |= IP_NAT_RANGE_PROTO_SPECIFIED; + range->flags |= NF_NAT_RANGE_PROTO_SPECIFIED; } if (tb[CTA_PROTONAT_PORT_MAX]) { range->max.all = nla_get_be16(tb[CTA_PROTONAT_PORT_MAX]); - range->flags |= IP_NAT_RANGE_PROTO_SPECIFIED; + range->flags |= NF_NAT_RANGE_PROTO_SPECIFIED; } return 0; } -EXPORT_SYMBOL_GPL(nf_nat_proto_range_to_nlattr); +EXPORT_SYMBOL_GPL(nf_nat_proto_nlattr_to_range); #endif diff --git a/net/ipv4/netfilter/nf_nat_proto_dccp.c b/net/ipv4/netfilter/nf_nat_proto_dccp.c index 570faf2667b2..3f67138d187c 100644 --- a/net/ipv4/netfilter/nf_nat_proto_dccp.c +++ b/net/ipv4/netfilter/nf_nat_proto_dccp.c @@ -24,7 +24,7 @@ static u_int16_t dccp_port_rover; static void dccp_unique_tuple(struct nf_conntrack_tuple *tuple, - const struct nf_nat_range *range, + const struct nf_nat_ipv4_range *range, enum nf_nat_manip_type maniptype, const struct nf_conn *ct) { @@ -54,7 +54,7 @@ dccp_manip_pkt(struct sk_buff *skb, iph = (struct iphdr *)(skb->data + iphdroff); hdr = (struct dccp_hdr *)(skb->data + hdroff); - if (maniptype == IP_NAT_MANIP_SRC) { + if (maniptype == NF_NAT_MANIP_SRC) { oldip = iph->saddr; newip = tuple->src.u3.ip; newport = tuple->src.u.dccp.port; @@ -80,12 +80,10 @@ dccp_manip_pkt(struct sk_buff *skb, static const struct nf_nat_protocol nf_nat_protocol_dccp = { .protonum = IPPROTO_DCCP, - .me = THIS_MODULE, .manip_pkt = dccp_manip_pkt, .in_range = nf_nat_proto_in_range, .unique_tuple = dccp_unique_tuple, #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) - .range_to_nlattr = nf_nat_proto_range_to_nlattr, .nlattr_to_range = nf_nat_proto_nlattr_to_range, #endif }; diff --git a/net/ipv4/netfilter/nf_nat_proto_gre.c b/net/ipv4/netfilter/nf_nat_proto_gre.c index bc8d83a31c73..46ba0b9ab985 100644 --- a/net/ipv4/netfilter/nf_nat_proto_gre.c +++ b/net/ipv4/netfilter/nf_nat_proto_gre.c @@ -39,7 +39,7 @@ MODULE_DESCRIPTION("Netfilter NAT protocol helper module for GRE"); /* generate unique tuple ... */ static void gre_unique_tuple(struct nf_conntrack_tuple *tuple, - const struct nf_nat_range *range, + const struct nf_nat_ipv4_range *range, enum nf_nat_manip_type maniptype, const struct nf_conn *ct) { @@ -52,12 +52,12 @@ gre_unique_tuple(struct nf_conntrack_tuple *tuple, if (!ct->master) return; - if (maniptype == IP_NAT_MANIP_SRC) + if (maniptype == NF_NAT_MANIP_SRC) keyptr = &tuple->src.u.gre.key; else keyptr = &tuple->dst.u.gre.key; - if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) { + if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) { pr_debug("%p: NATing GRE PPTP\n", ct); min = 1; range_size = 0xffff; @@ -99,7 +99,7 @@ gre_manip_pkt(struct sk_buff *skb, unsigned int iphdroff, /* we only have destination manip of a packet, since 'source key' * is not present in the packet itself */ - if (maniptype != IP_NAT_MANIP_DST) + if (maniptype != NF_NAT_MANIP_DST) return true; switch (greh->version) { case GRE_VERSION_1701: @@ -119,12 +119,10 @@ gre_manip_pkt(struct sk_buff *skb, unsigned int iphdroff, static const struct nf_nat_protocol gre = { .protonum = IPPROTO_GRE, - .me = THIS_MODULE, .manip_pkt = gre_manip_pkt, .in_range = nf_nat_proto_in_range, .unique_tuple = gre_unique_tuple, #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) - .range_to_nlattr = nf_nat_proto_range_to_nlattr, .nlattr_to_range = nf_nat_proto_nlattr_to_range, #endif }; diff --git a/net/ipv4/netfilter/nf_nat_proto_icmp.c b/net/ipv4/netfilter/nf_nat_proto_icmp.c index 5744c3ec847c..b35172851bae 100644 --- a/net/ipv4/netfilter/nf_nat_proto_icmp.c +++ b/net/ipv4/netfilter/nf_nat_proto_icmp.c @@ -8,6 +8,7 @@ #include <linux/types.h> #include <linux/init.h> +#include <linux/export.h> #include <linux/ip.h> #include <linux/icmp.h> @@ -29,7 +30,7 @@ icmp_in_range(const struct nf_conntrack_tuple *tuple, static void icmp_unique_tuple(struct nf_conntrack_tuple *tuple, - const struct nf_nat_range *range, + const struct nf_nat_ipv4_range *range, enum nf_nat_manip_type maniptype, const struct nf_conn *ct) { @@ -39,7 +40,7 @@ icmp_unique_tuple(struct nf_conntrack_tuple *tuple, range_size = ntohs(range->max.icmp.id) - ntohs(range->min.icmp.id) + 1; /* If no range specified... */ - if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) + if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) range_size = 0xFFFF; for (i = 0; ; ++id) { @@ -73,12 +74,10 @@ icmp_manip_pkt(struct sk_buff *skb, const struct nf_nat_protocol nf_nat_protocol_icmp = { .protonum = IPPROTO_ICMP, - .me = THIS_MODULE, .manip_pkt = icmp_manip_pkt, .in_range = icmp_in_range, .unique_tuple = icmp_unique_tuple, #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) - .range_to_nlattr = nf_nat_proto_range_to_nlattr, .nlattr_to_range = nf_nat_proto_nlattr_to_range, #endif }; diff --git a/net/ipv4/netfilter/nf_nat_proto_sctp.c b/net/ipv4/netfilter/nf_nat_proto_sctp.c index 756331d42661..3cce9b6c1c29 100644 --- a/net/ipv4/netfilter/nf_nat_proto_sctp.c +++ b/net/ipv4/netfilter/nf_nat_proto_sctp.c @@ -10,6 +10,7 @@ #include <linux/init.h> #include <linux/ip.h> #include <linux/sctp.h> +#include <linux/module.h> #include <net/sctp/checksum.h> #include <net/netfilter/nf_nat_protocol.h> @@ -18,7 +19,7 @@ static u_int16_t nf_sctp_port_rover; static void sctp_unique_tuple(struct nf_conntrack_tuple *tuple, - const struct nf_nat_range *range, + const struct nf_nat_ipv4_range *range, enum nf_nat_manip_type maniptype, const struct nf_conn *ct) { @@ -45,7 +46,7 @@ sctp_manip_pkt(struct sk_buff *skb, iph = (struct iphdr *)(skb->data + iphdroff); hdr = (struct sctphdr *)(skb->data + hdroff); - if (maniptype == IP_NAT_MANIP_SRC) { + if (maniptype == NF_NAT_MANIP_SRC) { /* Get rid of src ip and src pt */ oldip = iph->saddr; newip = tuple->src.u3.ip; @@ -69,12 +70,10 @@ sctp_manip_pkt(struct sk_buff *skb, static const struct nf_nat_protocol nf_nat_protocol_sctp = { .protonum = IPPROTO_SCTP, - .me = THIS_MODULE, .manip_pkt = sctp_manip_pkt, .in_range = nf_nat_proto_in_range, .unique_tuple = sctp_unique_tuple, #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) - .range_to_nlattr = nf_nat_proto_range_to_nlattr, .nlattr_to_range = nf_nat_proto_nlattr_to_range, #endif }; diff --git a/net/ipv4/netfilter/nf_nat_proto_tcp.c b/net/ipv4/netfilter/nf_nat_proto_tcp.c index aa460a595d5d..9fb4b4e72bbf 100644 --- a/net/ipv4/netfilter/nf_nat_proto_tcp.c +++ b/net/ipv4/netfilter/nf_nat_proto_tcp.c @@ -8,6 +8,7 @@ #include <linux/types.h> #include <linux/init.h> +#include <linux/export.h> #include <linux/ip.h> #include <linux/tcp.h> @@ -22,7 +23,7 @@ static u_int16_t tcp_port_rover; static void tcp_unique_tuple(struct nf_conntrack_tuple *tuple, - const struct nf_nat_range *range, + const struct nf_nat_ipv4_range *range, enum nf_nat_manip_type maniptype, const struct nf_conn *ct) { @@ -54,7 +55,7 @@ tcp_manip_pkt(struct sk_buff *skb, iph = (struct iphdr *)(skb->data + iphdroff); hdr = (struct tcphdr *)(skb->data + hdroff); - if (maniptype == IP_NAT_MANIP_SRC) { + if (maniptype == NF_NAT_MANIP_SRC) { /* Get rid of src ip and src pt */ oldip = iph->saddr; newip = tuple->src.u3.ip; @@ -81,12 +82,10 @@ tcp_manip_pkt(struct sk_buff *skb, const struct nf_nat_protocol nf_nat_protocol_tcp = { .protonum = IPPROTO_TCP, - .me = THIS_MODULE, .manip_pkt = tcp_manip_pkt, .in_range = nf_nat_proto_in_range, .unique_tuple = tcp_unique_tuple, #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) - .range_to_nlattr = nf_nat_proto_range_to_nlattr, .nlattr_to_range = nf_nat_proto_nlattr_to_range, #endif }; diff --git a/net/ipv4/netfilter/nf_nat_proto_udp.c b/net/ipv4/netfilter/nf_nat_proto_udp.c index dfe65c7e2925..9883336e628f 100644 --- a/net/ipv4/netfilter/nf_nat_proto_udp.c +++ b/net/ipv4/netfilter/nf_nat_proto_udp.c @@ -7,6 +7,7 @@ */ #include <linux/types.h> +#include <linux/export.h> #include <linux/init.h> #include <linux/ip.h> #include <linux/udp.h> @@ -21,7 +22,7 @@ static u_int16_t udp_port_rover; static void udp_unique_tuple(struct nf_conntrack_tuple *tuple, - const struct nf_nat_range *range, + const struct nf_nat_ipv4_range *range, enum nf_nat_manip_type maniptype, const struct nf_conn *ct) { @@ -46,7 +47,7 @@ udp_manip_pkt(struct sk_buff *skb, iph = (struct iphdr *)(skb->data + iphdroff); hdr = (struct udphdr *)(skb->data + hdroff); - if (maniptype == IP_NAT_MANIP_SRC) { + if (maniptype == NF_NAT_MANIP_SRC) { /* Get rid of src ip and src pt */ oldip = iph->saddr; newip = tuple->src.u3.ip; @@ -72,12 +73,10 @@ udp_manip_pkt(struct sk_buff *skb, const struct nf_nat_protocol nf_nat_protocol_udp = { .protonum = IPPROTO_UDP, - .me = THIS_MODULE, .manip_pkt = udp_manip_pkt, .in_range = nf_nat_proto_in_range, .unique_tuple = udp_unique_tuple, #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) - .range_to_nlattr = nf_nat_proto_range_to_nlattr, .nlattr_to_range = nf_nat_proto_nlattr_to_range, #endif }; diff --git a/net/ipv4/netfilter/nf_nat_proto_udplite.c b/net/ipv4/netfilter/nf_nat_proto_udplite.c index 3cc8c8af39ef..d24d10a7beb2 100644 --- a/net/ipv4/netfilter/nf_nat_proto_udplite.c +++ b/net/ipv4/netfilter/nf_nat_proto_udplite.c @@ -13,6 +13,7 @@ #include <linux/udp.h> #include <linux/netfilter.h> +#include <linux/module.h> #include <net/netfilter/nf_nat.h> #include <net/netfilter/nf_nat_protocol.h> @@ -20,7 +21,7 @@ static u_int16_t udplite_port_rover; static void udplite_unique_tuple(struct nf_conntrack_tuple *tuple, - const struct nf_nat_range *range, + const struct nf_nat_ipv4_range *range, enum nf_nat_manip_type maniptype, const struct nf_conn *ct) { @@ -46,7 +47,7 @@ udplite_manip_pkt(struct sk_buff *skb, iph = (struct iphdr *)(skb->data + iphdroff); hdr = (struct udphdr *)(skb->data + hdroff); - if (maniptype == IP_NAT_MANIP_SRC) { + if (maniptype == NF_NAT_MANIP_SRC) { /* Get rid of src ip and src pt */ oldip = iph->saddr; newip = tuple->src.u3.ip; @@ -71,12 +72,10 @@ udplite_manip_pkt(struct sk_buff *skb, static const struct nf_nat_protocol nf_nat_protocol_udplite = { .protonum = IPPROTO_UDPLITE, - .me = THIS_MODULE, .manip_pkt = udplite_manip_pkt, .in_range = nf_nat_proto_in_range, .unique_tuple = udplite_unique_tuple, #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) - .range_to_nlattr = nf_nat_proto_range_to_nlattr, .nlattr_to_range = nf_nat_proto_nlattr_to_range, #endif }; diff --git a/net/ipv4/netfilter/nf_nat_proto_unknown.c b/net/ipv4/netfilter/nf_nat_proto_unknown.c index a50f2bc1c732..e0afe8112b1c 100644 --- a/net/ipv4/netfilter/nf_nat_proto_unknown.c +++ b/net/ipv4/netfilter/nf_nat_proto_unknown.c @@ -27,7 +27,7 @@ static bool unknown_in_range(const struct nf_conntrack_tuple *tuple, } static void unknown_unique_tuple(struct nf_conntrack_tuple *tuple, - const struct nf_nat_range *range, + const struct nf_nat_ipv4_range *range, enum nf_nat_manip_type maniptype, const struct nf_conn *ct) { @@ -46,7 +46,6 @@ unknown_manip_pkt(struct sk_buff *skb, } const struct nf_nat_protocol nf_nat_unknown_protocol = { - /* .me isn't set: getting a ref to this cannot fail. */ .manip_pkt = unknown_manip_pkt, .in_range = unknown_in_range, .unique_tuple = unknown_unique_tuple, diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c index 733c9abc1cbd..d2a9dc314e0e 100644 --- a/net/ipv4/netfilter/nf_nat_rule.c +++ b/net/ipv4/netfilter/nf_nat_rule.c @@ -44,7 +44,7 @@ ipt_snat_target(struct sk_buff *skb, const struct xt_action_param *par) { struct nf_conn *ct; enum ip_conntrack_info ctinfo; - const struct nf_nat_multi_range_compat *mr = par->targinfo; + const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; NF_CT_ASSERT(par->hooknum == NF_INET_POST_ROUTING || par->hooknum == NF_INET_LOCAL_IN); @@ -56,7 +56,7 @@ ipt_snat_target(struct sk_buff *skb, const struct xt_action_param *par) ctinfo == IP_CT_RELATED_REPLY)); NF_CT_ASSERT(par->out != NULL); - return nf_nat_setup_info(ct, &mr->range[0], IP_NAT_MANIP_SRC); + return nf_nat_setup_info(ct, &mr->range[0], NF_NAT_MANIP_SRC); } static unsigned int @@ -64,7 +64,7 @@ ipt_dnat_target(struct sk_buff *skb, const struct xt_action_param *par) { struct nf_conn *ct; enum ip_conntrack_info ctinfo; - const struct nf_nat_multi_range_compat *mr = par->targinfo; + const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING || par->hooknum == NF_INET_LOCAL_OUT); @@ -74,12 +74,12 @@ ipt_dnat_target(struct sk_buff *skb, const struct xt_action_param *par) /* Connection must be valid and new. */ NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED)); - return nf_nat_setup_info(ct, &mr->range[0], IP_NAT_MANIP_DST); + return nf_nat_setup_info(ct, &mr->range[0], NF_NAT_MANIP_DST); } static int ipt_snat_checkentry(const struct xt_tgchk_param *par) { - const struct nf_nat_multi_range_compat *mr = par->targinfo; + const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; /* Must be a valid range */ if (mr->rangesize != 1) { @@ -91,7 +91,7 @@ static int ipt_snat_checkentry(const struct xt_tgchk_param *par) static int ipt_dnat_checkentry(const struct xt_tgchk_param *par) { - const struct nf_nat_multi_range_compat *mr = par->targinfo; + const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; /* Must be a valid range */ if (mr->rangesize != 1) { @@ -105,13 +105,13 @@ static unsigned int alloc_null_binding(struct nf_conn *ct, unsigned int hooknum) { /* Force range to this IP; let proto decide mapping for - per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED). + per-proto parts (hence not NF_NAT_RANGE_PROTO_SPECIFIED). */ - struct nf_nat_range range; + struct nf_nat_ipv4_range range; range.flags = 0; pr_debug("Allocating NULL binding for %p (%pI4)\n", ct, - HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC ? + HOOK2MANIP(hooknum) == NF_NAT_MANIP_SRC ? &ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip : &ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip); @@ -140,7 +140,7 @@ int nf_nat_rule_find(struct sk_buff *skb, static struct xt_target ipt_snat_reg __read_mostly = { .name = "SNAT", .target = ipt_snat_target, - .targetsize = sizeof(struct nf_nat_multi_range_compat), + .targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat), .table = "nat", .hooks = (1 << NF_INET_POST_ROUTING) | (1 << NF_INET_LOCAL_IN), .checkentry = ipt_snat_checkentry, @@ -150,7 +150,7 @@ static struct xt_target ipt_snat_reg __read_mostly = { static struct xt_target ipt_dnat_reg __read_mostly = { .name = "DNAT", .target = ipt_dnat_target, - .targetsize = sizeof(struct nf_nat_multi_range_compat), + .targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat), .table = "nat", .hooks = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT), .checkentry = ipt_dnat_checkentry, diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c index e40cf7816fdb..d0319f96269f 100644 --- a/net/ipv4/netfilter/nf_nat_sip.c +++ b/net/ipv4/netfilter/nf_nat_sip.c @@ -249,25 +249,25 @@ static void ip_nat_sip_seq_adjust(struct sk_buff *skb, s16 off) static void ip_nat_sip_expected(struct nf_conn *ct, struct nf_conntrack_expect *exp) { - struct nf_nat_range range; + struct nf_nat_ipv4_range range; /* This must be a fresh one. */ BUG_ON(ct->status & IPS_NAT_DONE_MASK); /* For DST manip, map port here to where it's expected. */ - range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED); + range.flags = (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED); range.min = range.max = exp->saved_proto; range.min_ip = range.max_ip = exp->saved_ip; - nf_nat_setup_info(ct, &range, IP_NAT_MANIP_DST); + nf_nat_setup_info(ct, &range, NF_NAT_MANIP_DST); /* Change src to where master sends to, but only if the connection * actually came from the same source. */ if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip == ct->master->tuplehash[exp->dir].tuple.src.u3.ip) { - range.flags = IP_NAT_RANGE_MAP_IPS; + range.flags = NF_NAT_RANGE_MAP_IPS; range.min_ip = range.max_ip = ct->master->tuplehash[!exp->dir].tuple.dst.u3.ip; - nf_nat_setup_info(ct, &range, IP_NAT_MANIP_SRC); + nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC); } } @@ -528,13 +528,13 @@ err1: static void __exit nf_nat_sip_fini(void) { - rcu_assign_pointer(nf_nat_sip_hook, NULL); - rcu_assign_pointer(nf_nat_sip_seq_adjust_hook, NULL); - rcu_assign_pointer(nf_nat_sip_expect_hook, NULL); - rcu_assign_pointer(nf_nat_sdp_addr_hook, NULL); - rcu_assign_pointer(nf_nat_sdp_port_hook, NULL); - rcu_assign_pointer(nf_nat_sdp_session_hook, NULL); - rcu_assign_pointer(nf_nat_sdp_media_hook, NULL); + RCU_INIT_POINTER(nf_nat_sip_hook, NULL); + RCU_INIT_POINTER(nf_nat_sip_seq_adjust_hook, NULL); + RCU_INIT_POINTER(nf_nat_sip_expect_hook, NULL); + RCU_INIT_POINTER(nf_nat_sdp_addr_hook, NULL); + RCU_INIT_POINTER(nf_nat_sdp_port_hook, NULL); + RCU_INIT_POINTER(nf_nat_sdp_session_hook, NULL); + RCU_INIT_POINTER(nf_nat_sdp_media_hook, NULL); synchronize_rcu(); } @@ -547,13 +547,13 @@ static int __init nf_nat_sip_init(void) BUG_ON(nf_nat_sdp_port_hook != NULL); BUG_ON(nf_nat_sdp_session_hook != NULL); BUG_ON(nf_nat_sdp_media_hook != NULL); - rcu_assign_pointer(nf_nat_sip_hook, ip_nat_sip); - rcu_assign_pointer(nf_nat_sip_seq_adjust_hook, ip_nat_sip_seq_adjust); - rcu_assign_pointer(nf_nat_sip_expect_hook, ip_nat_sip_expect); - rcu_assign_pointer(nf_nat_sdp_addr_hook, ip_nat_sdp_addr); - rcu_assign_pointer(nf_nat_sdp_port_hook, ip_nat_sdp_port); - rcu_assign_pointer(nf_nat_sdp_session_hook, ip_nat_sdp_session); - rcu_assign_pointer(nf_nat_sdp_media_hook, ip_nat_sdp_media); + RCU_INIT_POINTER(nf_nat_sip_hook, ip_nat_sip); + RCU_INIT_POINTER(nf_nat_sip_seq_adjust_hook, ip_nat_sip_seq_adjust); + RCU_INIT_POINTER(nf_nat_sip_expect_hook, ip_nat_sip_expect); + RCU_INIT_POINTER(nf_nat_sdp_addr_hook, ip_nat_sdp_addr); + RCU_INIT_POINTER(nf_nat_sdp_port_hook, ip_nat_sdp_port); + RCU_INIT_POINTER(nf_nat_sdp_session_hook, ip_nat_sdp_session); + RCU_INIT_POINTER(nf_nat_sdp_media_hook, ip_nat_sdp_media); return 0; } diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c index 076b7c8c4aa4..2133c30a4a5f 100644 --- a/net/ipv4/netfilter/nf_nat_snmp_basic.c +++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c @@ -400,11 +400,8 @@ static unsigned char asn1_octets_decode(struct asn1_ctx *ctx, *len = 0; *octets = kmalloc(eoc - ctx->pointer, GFP_ATOMIC); - if (*octets == NULL) { - if (net_ratelimit()) - pr_notice("OOM in bsalg (%d)\n", __LINE__); + if (*octets == NULL) return 0; - } ptr = *octets; while (ctx->pointer < eoc) { @@ -451,11 +448,8 @@ static unsigned char asn1_oid_decode(struct asn1_ctx *ctx, return 0; *oid = kmalloc(size * sizeof(unsigned long), GFP_ATOMIC); - if (*oid == NULL) { - if (net_ratelimit()) - pr_notice("OOM in bsalg (%d)\n", __LINE__); + if (*oid == NULL) return 0; - } optr = *oid; @@ -728,8 +722,6 @@ static unsigned char snmp_object_decode(struct asn1_ctx *ctx, *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC); if (*obj == NULL) { kfree(id); - if (net_ratelimit()) - pr_notice("OOM in bsalg (%d)\n", __LINE__); return 0; } (*obj)->syntax.l[0] = l; @@ -744,8 +736,6 @@ static unsigned char snmp_object_decode(struct asn1_ctx *ctx, if (*obj == NULL) { kfree(p); kfree(id); - if (net_ratelimit()) - pr_notice("OOM in bsalg (%d)\n", __LINE__); return 0; } memcpy((*obj)->syntax.c, p, len); @@ -759,8 +749,6 @@ static unsigned char snmp_object_decode(struct asn1_ctx *ctx, *obj = kmalloc(sizeof(struct snmp_object), GFP_ATOMIC); if (*obj == NULL) { kfree(id); - if (net_ratelimit()) - pr_notice("OOM in bsalg (%d)\n", __LINE__); return 0; } if (!asn1_null_decode(ctx, end)) { @@ -780,8 +768,6 @@ static unsigned char snmp_object_decode(struct asn1_ctx *ctx, if (*obj == NULL) { kfree(lp); kfree(id); - if (net_ratelimit()) - pr_notice("OOM in bsalg (%d)\n", __LINE__); return 0; } memcpy((*obj)->syntax.ul, lp, len); @@ -801,8 +787,6 @@ static unsigned char snmp_object_decode(struct asn1_ctx *ctx, if (*obj == NULL) { kfree(p); kfree(id); - if (net_ratelimit()) - pr_notice("OOM in bsalg (%d)\n", __LINE__); return 0; } memcpy((*obj)->syntax.uc, p, len); @@ -819,8 +803,6 @@ static unsigned char snmp_object_decode(struct asn1_ctx *ctx, *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC); if (*obj == NULL) { kfree(id); - if (net_ratelimit()) - pr_notice("OOM in bsalg (%d)\n", __LINE__); return 0; } (*obj)->syntax.ul[0] = ul; @@ -1310,7 +1292,7 @@ static int __init nf_nat_snmp_basic_init(void) int ret = 0; BUG_ON(nf_nat_snmp_hook != NULL); - rcu_assign_pointer(nf_nat_snmp_hook, help); + RCU_INIT_POINTER(nf_nat_snmp_hook, help); ret = nf_conntrack_helper_register(&snmp_trap_helper); if (ret < 0) { @@ -1322,7 +1304,7 @@ static int __init nf_nat_snmp_basic_init(void) static void __exit nf_nat_snmp_basic_fini(void) { - rcu_assign_pointer(nf_nat_snmp_hook, NULL); + RCU_INIT_POINTER(nf_nat_snmp_hook, NULL); nf_conntrack_helper_unregister(&snmp_trap_helper); } diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c index a6e606e84820..3828a4229822 100644 --- a/net/ipv4/netfilter/nf_nat_standalone.c +++ b/net/ipv4/netfilter/nf_nat_standalone.c @@ -137,7 +137,7 @@ nf_nat_fn(unsigned int hooknum, return ret; } else pr_debug("Already setup manip %s for ct %p\n", - maniptype == IP_NAT_MANIP_SRC ? "SRC" : "DST", + maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST", ct); break; @@ -284,7 +284,7 @@ static int __init nf_nat_standalone_init(void) #ifdef CONFIG_XFRM BUG_ON(ip_nat_decode_session != NULL); - rcu_assign_pointer(ip_nat_decode_session, nat_decode_session); + RCU_INIT_POINTER(ip_nat_decode_session, nat_decode_session); #endif ret = nf_nat_rule_init(); if (ret < 0) { @@ -302,7 +302,7 @@ static int __init nf_nat_standalone_init(void) nf_nat_rule_cleanup(); cleanup_decode_session: #ifdef CONFIG_XFRM - rcu_assign_pointer(ip_nat_decode_session, NULL); + RCU_INIT_POINTER(ip_nat_decode_session, NULL); synchronize_net(); #endif return ret; @@ -313,7 +313,7 @@ static void __exit nf_nat_standalone_fini(void) nf_unregister_hooks(nf_nat_ops, ARRAY_SIZE(nf_nat_ops)); nf_nat_rule_cleanup(); #ifdef CONFIG_XFRM - rcu_assign_pointer(ip_nat_decode_session, NULL); + RCU_INIT_POINTER(ip_nat_decode_session, NULL); synchronize_net(); #endif /* Conntrack caches are unregistered in nf_conntrack_cleanup */ diff --git a/net/ipv4/netfilter/nf_nat_tftp.c b/net/ipv4/netfilter/nf_nat_tftp.c index 7274a43c7a12..a2901bf829c0 100644 --- a/net/ipv4/netfilter/nf_nat_tftp.c +++ b/net/ipv4/netfilter/nf_nat_tftp.c @@ -36,14 +36,14 @@ static unsigned int help(struct sk_buff *skb, static void __exit nf_nat_tftp_fini(void) { - rcu_assign_pointer(nf_nat_tftp_hook, NULL); + RCU_INIT_POINTER(nf_nat_tftp_hook, NULL); synchronize_rcu(); } static int __init nf_nat_tftp_init(void) { BUG_ON(nf_nat_tftp_hook != NULL); - rcu_assign_pointer(nf_nat_tftp_hook, help); + RCU_INIT_POINTER(nf_nat_tftp_hook, help); return 0; } diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 39b403f854c6..43d4c3b22369 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -39,6 +39,7 @@ #include <net/protocol.h> #include <linux/skbuff.h> #include <linux/proc_fs.h> +#include <linux/export.h> #include <net/sock.h> #include <net/ping.h> #include <net/udp.h> @@ -338,7 +339,6 @@ void ping_err(struct sk_buff *skb, u32 info) sk = ping_v4_lookup(net, iph->daddr, iph->saddr, ntohs(icmph->un.echo.id), skb->dev->ifindex); if (sk == NULL) { - ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); pr_debug("no socket, dropping\n"); return; /* No socket for error */ } @@ -678,7 +678,6 @@ static int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) pr_debug("ping_queue_rcv_skb(sk=%p,sk->num=%d,skb=%p)\n", inet_sk(sk), inet_sk(sk)->inet_num, skb); if (sock_queue_rcv_skb(sk, skb) < 0) { - ICMP_INC_STATS_BH(sock_net(sk), ICMP_MIB_INERRORS); kfree_skb(skb); pr_debug("ping_queue_rcv_skb -> failed\n"); return -1; diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 4bfad5da94f4..3569d8ecaeac 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -42,6 +42,7 @@ #include <linux/inetdevice.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> +#include <linux/export.h> #include <net/sock.h> #include <net/raw.h> @@ -55,17 +56,17 @@ static int sockstat_seq_show(struct seq_file *seq, void *v) local_bh_disable(); orphans = percpu_counter_sum_positive(&tcp_orphan_count); - sockets = percpu_counter_sum_positive(&tcp_sockets_allocated); + sockets = proto_sockets_allocated_sum_positive(&tcp_prot); local_bh_enable(); socket_seq_show(seq); seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %ld\n", sock_prot_inuse_get(net, &tcp_prot), orphans, tcp_death_row.tw_count, sockets, - atomic_long_read(&tcp_memory_allocated)); + proto_memory_allocated(&tcp_prot)); seq_printf(seq, "UDP: inuse %d mem %ld\n", sock_prot_inuse_get(net, &udp_prot), - atomic_long_read(&udp_memory_allocated)); + proto_memory_allocated(&udp_prot)); seq_printf(seq, "UDPLITE: inuse %d\n", sock_prot_inuse_get(net, &udplite_prot)); seq_printf(seq, "RAW: inuse %d\n", @@ -287,7 +288,7 @@ static void icmpmsg_put(struct seq_file *seq) count = 0; for (i = 0; i < ICMPMSG_MIB_MAX; i++) { - val = snmp_fold_field((void __percpu **) net->mib.icmpmsg_statistics, i); + val = atomic_long_read(&net->mib.icmpmsg_statistics->mibs[i]); if (val) { type[count] = i; vals[count++] = val; @@ -306,6 +307,7 @@ static void icmp_put(struct seq_file *seq) { int i; struct net *net = seq->private; + atomic_long_t *ptr = net->mib.icmpmsg_statistics->mibs; seq_puts(seq, "\nIcmp: InMsgs InErrors"); for (i=0; icmpmibmap[i].name != NULL; i++) @@ -318,15 +320,13 @@ static void icmp_put(struct seq_file *seq) snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_INERRORS)); for (i=0; icmpmibmap[i].name != NULL; i++) seq_printf(seq, " %lu", - snmp_fold_field((void __percpu **) net->mib.icmpmsg_statistics, - icmpmibmap[i].index)); + atomic_long_read(ptr + icmpmibmap[i].index)); seq_printf(seq, " %lu %lu", snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_OUTMSGS), snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_OUTERRORS)); for (i=0; icmpmibmap[i].name != NULL; i++) seq_printf(seq, " %lu", - snmp_fold_field((void __percpu **) net->mib.icmpmsg_statistics, - icmpmibmap[i].index | 0x100)); + atomic_long_read(ptr + (icmpmibmap[i].index | 0x100))); } /* diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 61714bd52925..3ccda5ae8a27 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -48,6 +48,7 @@ #include <linux/errno.h> #include <linux/aio.h> #include <linux/kernel.h> +#include <linux/export.h> #include <linux/spinlock.h> #include <linux/sockios.h> #include <linux/socket.h> @@ -291,7 +292,8 @@ static int raw_rcv_skb(struct sock * sk, struct sk_buff * skb) { /* Charge it to the socket. */ - if (ip_queue_rcv_skb(sk, skb) < 0) { + ipv4_pktinfo_prepare(skb); + if (sock_queue_rcv_skb(sk, skb) < 0) { kfree_skb(skb); return NET_RX_DROP; } @@ -326,6 +328,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4, unsigned int iphlen; int err; struct rtable *rt = *rtp; + int hlen, tlen; if (length > rt->dst.dev->mtu) { ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport, @@ -335,12 +338,14 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4, if (flags&MSG_PROBE) goto out; + hlen = LL_RESERVED_SPACE(rt->dst.dev); + tlen = rt->dst.dev->needed_tailroom; skb = sock_alloc_send_skb(sk, - length + LL_ALLOCATED_SPACE(rt->dst.dev) + 15, + length + hlen + tlen + 15, flags & MSG_DONTWAIT, &err); if (skb == NULL) goto error; - skb_reserve(skb, LL_RESERVED_SPACE(rt->dst.dev)); + skb_reserve(skb, hlen); skb->priority = sk->sk_priority; skb->mark = sk->sk_mark; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 075212e41b83..bcacf54e5418 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -91,6 +91,7 @@ #include <linux/rcupdate.h> #include <linux/times.h> #include <linux/slab.h> +#include <linux/prefetch.h> #include <net/dst.h> #include <net/net_namespace.h> #include <net/protocol.h> @@ -108,11 +109,10 @@ #ifdef CONFIG_SYSCTL #include <linux/sysctl.h> #endif -#include <net/atmclip.h> #include <net/secure_seq.h> #define RT_FL_TOS(oldflp4) \ - ((u32)(oldflp4->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))) + ((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK)) #define IP_MAX_MTU 0xFFF0 @@ -120,7 +120,7 @@ static int ip_rt_max_size; static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT; -static int ip_rt_gc_interval __read_mostly = 60 * HZ; +static int ip_rt_gc_interval __read_mostly = 60 * HZ; static int ip_rt_gc_min_interval __read_mostly = HZ / 2; static int ip_rt_redirect_number __read_mostly = 9; static int ip_rt_redirect_load __read_mostly = HZ / 50; @@ -132,6 +132,10 @@ static int ip_rt_mtu_expires __read_mostly = 10 * 60 * HZ; static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20; static int ip_rt_min_advmss __read_mostly = 256; static int rt_chain_length_max __read_mostly = 20; +static int redirect_genid; + +static struct delayed_work expires_work; +static unsigned long expires_ljiffies; /* * Interface to generic destination cache. @@ -139,7 +143,7 @@ static int rt_chain_length_max __read_mostly = 20; static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie); static unsigned int ipv4_default_advmss(const struct dst_entry *dst); -static unsigned int ipv4_default_mtu(const struct dst_entry *dst); +static unsigned int ipv4_mtu(const struct dst_entry *dst); static void ipv4_dst_destroy(struct dst_entry *dst); static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst); static void ipv4_link_failure(struct sk_buff *skb); @@ -194,7 +198,7 @@ static struct dst_ops ipv4_dst_ops = { .gc = rt_garbage_collect, .check = ipv4_dst_check, .default_advmss = ipv4_default_advmss, - .default_mtu = ipv4_default_mtu, + .mtu = ipv4_mtu, .cow_metrics = ipv4_cow_metrics, .destroy = ipv4_dst_destroy, .ifdown = ipv4_dst_ifdown, @@ -324,7 +328,7 @@ static struct rtable *rt_cache_get_first(struct seq_file *seq) struct rtable *r = NULL; for (st->bucket = rt_hash_mask; st->bucket >= 0; --st->bucket) { - if (!rcu_dereference_raw(rt_hash_table[st->bucket].chain)) + if (!rcu_access_pointer(rt_hash_table[st->bucket].chain)) continue; rcu_read_lock_bh(); r = rcu_dereference_bh(rt_hash_table[st->bucket].chain); @@ -350,7 +354,7 @@ static struct rtable *__rt_cache_get_next(struct seq_file *seq, do { if (--st->bucket < 0) return NULL; - } while (!rcu_dereference_raw(rt_hash_table[st->bucket].chain)); + } while (!rcu_access_pointer(rt_hash_table[st->bucket].chain)); rcu_read_lock_bh(); r = rcu_dereference_bh(rt_hash_table[st->bucket].chain); } @@ -417,9 +421,13 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v) else { struct rtable *r = v; struct neighbour *n; - int len; + int len, HHUptod; + + rcu_read_lock(); + n = dst_get_neighbour_noref(&r->dst); + HHUptod = (n && (n->nud_state & NUD_CONNECTED)) ? 1 : 0; + rcu_read_unlock(); - n = dst_get_neighbour(&r->dst); seq_printf(seq, "%s\t%08X\t%08X\t%8X\t%d\t%u\t%d\t" "%08X\t%d\t%u\t%u\t%02X\t%d\t%1d\t%08X%n", r->dst.dev ? r->dst.dev->name : "*", @@ -433,7 +441,7 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v) dst_metric(&r->dst, RTAX_RTTVAR)), r->rt_key_tos, -1, - (n && (n->nud_state & NUD_CONNECTED)) ? 1 : 0, + HHUptod, r->rt_spec_dst, &len); seq_printf(seq, "%*s\n", 127 - len, ""); @@ -761,7 +769,7 @@ static void rt_do_flush(struct net *net, int process_context) if (process_context && need_resched()) cond_resched(); - rth = rcu_dereference_raw(rt_hash_table[i].chain); + rth = rcu_access_pointer(rt_hash_table[i].chain); if (!rth) continue; @@ -826,6 +834,97 @@ static int has_noalias(const struct rtable *head, const struct rtable *rth) return ONE; } +static void rt_check_expire(void) +{ + static unsigned int rover; + unsigned int i = rover, goal; + struct rtable *rth; + struct rtable __rcu **rthp; + unsigned long samples = 0; + unsigned long sum = 0, sum2 = 0; + unsigned long delta; + u64 mult; + + delta = jiffies - expires_ljiffies; + expires_ljiffies = jiffies; + mult = ((u64)delta) << rt_hash_log; + if (ip_rt_gc_timeout > 1) + do_div(mult, ip_rt_gc_timeout); + goal = (unsigned int)mult; + if (goal > rt_hash_mask) + goal = rt_hash_mask + 1; + for (; goal > 0; goal--) { + unsigned long tmo = ip_rt_gc_timeout; + unsigned long length; + + i = (i + 1) & rt_hash_mask; + rthp = &rt_hash_table[i].chain; + + if (need_resched()) + cond_resched(); + + samples++; + + if (rcu_dereference_raw(*rthp) == NULL) + continue; + length = 0; + spin_lock_bh(rt_hash_lock_addr(i)); + while ((rth = rcu_dereference_protected(*rthp, + lockdep_is_held(rt_hash_lock_addr(i)))) != NULL) { + prefetch(rth->dst.rt_next); + if (rt_is_expired(rth)) { + *rthp = rth->dst.rt_next; + rt_free(rth); + continue; + } + if (rth->dst.expires) { + /* Entry is expired even if it is in use */ + if (time_before_eq(jiffies, rth->dst.expires)) { +nofree: + tmo >>= 1; + rthp = &rth->dst.rt_next; + /* + * We only count entries on + * a chain with equal hash inputs once + * so that entries for different QOS + * levels, and other non-hash input + * attributes don't unfairly skew + * the length computation + */ + length += has_noalias(rt_hash_table[i].chain, rth); + continue; + } + } else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout)) + goto nofree; + + /* Cleanup aged off entries. */ + *rthp = rth->dst.rt_next; + rt_free(rth); + } + spin_unlock_bh(rt_hash_lock_addr(i)); + sum += length; + sum2 += length*length; + } + if (samples) { + unsigned long avg = sum / samples; + unsigned long sd = int_sqrt(sum2 / samples - avg*avg); + rt_chain_length_max = max_t(unsigned long, + ip_rt_gc_elasticity, + (avg + 4*sd) >> FRACT_BITS); + } + rover = i; +} + +/* + * rt_worker_func() is run in process context. + * we call rt_check_expire() to scan part of the hash table + */ +static void rt_worker_func(struct work_struct *work) +{ + rt_check_expire(); + schedule_delayed_work(&expires_work, ip_rt_gc_interval); +} + /* * Perturbation of rt_genid by a small quantity [1..256] * Using 8 bits of shuffling ensure we can call rt_cache_invalidate() @@ -838,6 +937,7 @@ static void rt_cache_invalidate(struct net *net) get_random_bytes(&shuffle, sizeof(shuffle)); atomic_add(shuffle + 1U, &net->ipv4.rt_genid); + redirect_genid++; } /* @@ -1014,23 +1114,18 @@ static int slow_chain_length(const struct rtable *head) static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, const void *daddr) { - struct neigh_table *tbl = &arp_tbl; static const __be32 inaddr_any = 0; struct net_device *dev = dst->dev; const __be32 *pkey = daddr; struct neighbour *n; -#if defined(CONFIG_ATM_CLIP) || defined(CONFIG_ATM_CLIP_MODULE) - if (dev->type == ARPHRD_ATM) - tbl = clip_tbl_hook; -#endif if (dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT)) pkey = &inaddr_any; - n = __ipv4_neigh_lookup(tbl, dev, *(__force u32 *)pkey); + n = __ipv4_neigh_lookup(&arp_tbl, dev, *(__force u32 *)pkey); if (n) return n; - return neigh_create(tbl, pkey, dev); + return neigh_create(&arp_tbl, pkey, dev); } static int rt_bind_neighbour(struct rtable *rt) @@ -1266,7 +1361,7 @@ void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more) { struct rtable *rt = (struct rtable *) dst; - if (rt) { + if (rt && !(rt->dst.flags & DST_NOPEER)) { if (rt->peer == NULL) rt_bind_peer(rt, rt->rt_dst, 1); @@ -1277,7 +1372,7 @@ void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more) iph->id = htons(inet_getid(rt->peer, more)); return; } - } else + } else if (!rt) printk(KERN_DEBUG "rt_bind_peer(0) @%p\n", __builtin_return_address(0)); @@ -1305,11 +1400,40 @@ static void rt_del(unsigned hash, struct rtable *rt) spin_unlock_bh(rt_hash_lock_addr(hash)); } +static void check_peer_redir(struct dst_entry *dst, struct inet_peer *peer) +{ + struct rtable *rt = (struct rtable *) dst; + __be32 orig_gw = rt->rt_gateway; + struct neighbour *n, *old_n; + + dst_confirm(&rt->dst); + + rt->rt_gateway = peer->redirect_learned.a4; + + n = ipv4_neigh_lookup(&rt->dst, &rt->rt_gateway); + if (IS_ERR(n)) { + rt->rt_gateway = orig_gw; + return; + } + old_n = xchg(&rt->dst._neighbour, n); + if (old_n) + neigh_release(old_n); + if (!(n->nud_state & NUD_VALID)) { + neigh_event_send(n, NULL); + } else { + rt->rt_flags |= RTCF_REDIRECTED; + call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n); + } +} + /* called in rcu_read_lock() section */ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, __be32 saddr, struct net_device *dev) { + int s, i; struct in_device *in_dev = __in_dev_get_rcu(dev); + __be32 skeys[2] = { saddr, 0 }; + int ikeys[2] = { dev->ifindex, 0 }; struct inet_peer *peer; struct net *net; @@ -1332,13 +1456,45 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, goto reject_redirect; } - peer = inet_getpeer_v4(daddr, 1); - if (peer) { - peer->redirect_learned.a4 = new_gw; + for (s = 0; s < 2; s++) { + for (i = 0; i < 2; i++) { + unsigned int hash; + struct rtable __rcu **rthp; + struct rtable *rt; - inet_putpeer(peer); + hash = rt_hash(daddr, skeys[s], ikeys[i], rt_genid(net)); + + rthp = &rt_hash_table[hash].chain; + + while ((rt = rcu_dereference(*rthp)) != NULL) { + rthp = &rt->dst.rt_next; + + if (rt->rt_key_dst != daddr || + rt->rt_key_src != skeys[s] || + rt->rt_oif != ikeys[i] || + rt_is_input_route(rt) || + rt_is_expired(rt) || + !net_eq(dev_net(rt->dst.dev), net) || + rt->dst.error || + rt->dst.dev != dev || + rt->rt_gateway != old_gw) + continue; - atomic_inc(&__rt_peer_genid); + if (!rt->peer) + rt_bind_peer(rt, rt->rt_dst, 1); + + peer = rt->peer; + if (peer) { + if (peer->redirect_learned.a4 != new_gw || + peer->redirect_genid != redirect_genid) { + peer->redirect_learned.a4 = new_gw; + peer->redirect_genid = redirect_genid; + atomic_inc(&__rt_peer_genid); + } + check_peer_redir(&rt->dst, peer); + } + } + } } return; @@ -1568,11 +1724,10 @@ unsigned short ip_rt_frag_needed(struct net *net, const struct iphdr *iph, est_mtu = mtu; peer->pmtu_learned = mtu; peer->pmtu_expires = pmtu_expires; + atomic_inc(&__rt_peer_genid); } inet_putpeer(peer); - - atomic_inc(&__rt_peer_genid); } return est_mtu ? : new_mtu; } @@ -1625,40 +1780,9 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu) } } -static int check_peer_redir(struct dst_entry *dst, struct inet_peer *peer) -{ - struct rtable *rt = (struct rtable *) dst; - __be32 orig_gw = rt->rt_gateway; - struct neighbour *n, *old_n; - - dst_confirm(&rt->dst); - - rt->rt_gateway = peer->redirect_learned.a4; - - n = ipv4_neigh_lookup(&rt->dst, &rt->rt_gateway); - if (IS_ERR(n)) - return PTR_ERR(n); - old_n = xchg(&rt->dst._neighbour, n); - if (old_n) - neigh_release(old_n); - if (!n || !(n->nud_state & NUD_VALID)) { - if (n) - neigh_event_send(n, NULL); - rt->rt_gateway = orig_gw; - return -EAGAIN; - } else { - rt->rt_flags |= RTCF_REDIRECTED; - call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n); - } - return 0; -} -static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) +static void ipv4_validate_peer(struct rtable *rt) { - struct rtable *rt = (struct rtable *) dst; - - if (rt_is_expired(rt)) - return NULL; if (rt->rt_peer_genid != rt_peer_genid()) { struct inet_peer *peer; @@ -1667,17 +1791,26 @@ static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) peer = rt->peer; if (peer) { - check_peer_pmtu(dst, peer); + check_peer_pmtu(&rt->dst, peer); + if (peer->redirect_genid != redirect_genid) + peer->redirect_learned.a4 = 0; if (peer->redirect_learned.a4 && - peer->redirect_learned.a4 != rt->rt_gateway) { - if (check_peer_redir(dst, peer)) - return NULL; - } + peer->redirect_learned.a4 != rt->rt_gateway) + check_peer_redir(&rt->dst, peer); } rt->rt_peer_genid = rt_peer_genid(); } +} + +static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) +{ + struct rtable *rt = (struct rtable *) dst; + + if (rt_is_expired(rt)) + return NULL; + ipv4_validate_peer(rt); return dst; } @@ -1782,12 +1915,17 @@ static unsigned int ipv4_default_advmss(const struct dst_entry *dst) return advmss; } -static unsigned int ipv4_default_mtu(const struct dst_entry *dst) +static unsigned int ipv4_mtu(const struct dst_entry *dst) { - unsigned int mtu = dst->dev->mtu; + const struct rtable *rt = (const struct rtable *) dst; + unsigned int mtu = dst_metric_raw(dst, RTAX_MTU); + + if (mtu && rt_is_output_route(rt)) + return mtu; + + mtu = dst->dev->mtu; if (unlikely(dst_metric_locked(dst, RTAX_MTU))) { - const struct rtable *rt = (const struct rtable *) dst; if (rt->rt_gateway != rt->rt_dst && mtu > 576) mtu = 576; @@ -1820,6 +1958,8 @@ static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4, dst_init_metrics(&rt->dst, peer->metrics, false); check_peer_pmtu(&rt->dst, peer); + if (peer->redirect_genid != redirect_genid) + peer->redirect_learned.a4 = 0; if (peer->redirect_learned.a4 && peer->redirect_learned.a4 != rt->rt_gateway) { rt->rt_gateway = peer->redirect_learned.a4; @@ -2325,6 +2465,7 @@ int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr, rth->rt_mark == skb->mark && net_eq(dev_net(rth->dst.dev), net) && !rt_is_expired(rth)) { + ipv4_validate_peer(rth); if (noref) { dst_use_noref(&rth->dst, jiffies); skb_dst_set_noref(skb, &rth->dst); @@ -2383,11 +2524,11 @@ EXPORT_SYMBOL(ip_route_input_common); static struct rtable *__mkroute_output(const struct fib_result *res, const struct flowi4 *fl4, __be32 orig_daddr, __be32 orig_saddr, - int orig_oif, struct net_device *dev_out, + int orig_oif, __u8 orig_rtos, + struct net_device *dev_out, unsigned int flags) { struct fib_info *fi = res->fi; - u32 tos = RT_FL_TOS(fl4); struct in_device *in_dev; u16 type = res->type; struct rtable *rth; @@ -2438,7 +2579,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res, rth->rt_genid = rt_genid(dev_net(dev_out)); rth->rt_flags = flags; rth->rt_type = type; - rth->rt_key_tos = tos; + rth->rt_key_tos = orig_rtos; rth->rt_dst = fl4->daddr; rth->rt_src = fl4->saddr; rth->rt_route_iif = 0; @@ -2488,7 +2629,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res, static struct rtable *ip_route_output_slow(struct net *net, struct flowi4 *fl4) { struct net_device *dev_out = NULL; - u32 tos = RT_FL_TOS(fl4); + __u8 tos = RT_FL_TOS(fl4); unsigned int flags = 0; struct fib_result res; struct rtable *rth; @@ -2664,7 +2805,7 @@ static struct rtable *ip_route_output_slow(struct net *net, struct flowi4 *fl4) make_route: rth = __mkroute_output(&res, fl4, orig_daddr, orig_saddr, orig_oif, - dev_out, flags); + tos, dev_out, flags); if (!IS_ERR(rth)) { unsigned int hash; @@ -2700,6 +2841,7 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *flp4) (IPTOS_RT_MASK | RTO_ONLINK)) && net_eq(dev_net(rth->dst.dev), net) && !rt_is_expired(rth)) { + ipv4_validate_peer(rth); dst_use(&rth->dst, jiffies); RT_CACHE_STAT_INC(out_hit); rcu_read_unlock_bh(); @@ -2723,9 +2865,11 @@ static struct dst_entry *ipv4_blackhole_dst_check(struct dst_entry *dst, u32 coo return NULL; } -static unsigned int ipv4_blackhole_default_mtu(const struct dst_entry *dst) +static unsigned int ipv4_blackhole_mtu(const struct dst_entry *dst) { - return 0; + unsigned int mtu = dst_metric_raw(dst, RTAX_MTU); + + return mtu ? : dst->dev->mtu; } static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu) @@ -2743,7 +2887,7 @@ static struct dst_ops ipv4_dst_blackhole_ops = { .protocol = cpu_to_be16(ETH_P_IP), .destroy = ipv4_dst_destroy, .check = ipv4_blackhole_dst_check, - .default_mtu = ipv4_blackhole_default_mtu, + .mtu = ipv4_blackhole_mtu, .default_advmss = ipv4_default_advmss, .update_pmtu = ipv4_rt_blackhole_update_pmtu, .cow_metrics = ipv4_rt_blackhole_cow_metrics, @@ -2821,7 +2965,7 @@ static int rt_fill_info(struct net *net, struct rtable *rt = skb_rtable(skb); struct rtmsg *r; struct nlmsghdr *nlh; - long expires = 0; + unsigned long expires = 0; const struct inet_peer *peer = rt->peer; u32 id = 0, ts = 0, tsage = 0, error; @@ -2878,8 +3022,12 @@ static int rt_fill_info(struct net *net, tsage = get_seconds() - peer->tcp_ts_stamp; } expires = ACCESS_ONCE(peer->pmtu_expires); - if (expires) - expires -= jiffies; + if (expires) { + if (time_before(jiffies, expires)) + expires -= jiffies; + else + expires = 0; + } } if (rt_is_input_route(rt)) { @@ -3337,6 +3485,11 @@ int __init ip_rt_init(void) devinet_init(); ip_fib_init(); + INIT_DELAYED_WORK_DEFERRABLE(&expires_work, rt_worker_func); + expires_ljiffies = jiffies; + schedule_delayed_work(&expires_work, + net_random() % ip_rt_gc_interval + ip_rt_gc_interval); + if (ip_rt_proc_init()) printk(KERN_ERR "Unable to create route proc files\n"); #ifdef CONFIG_XFRM diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 3bc5c8f7c71b..51fdbb490437 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -15,6 +15,7 @@ #include <linux/random.h> #include <linux/cryptohash.h> #include <linux/kernel.h> +#include <linux/export.h> #include <net/tcp.h> #include <net/route.h> @@ -244,7 +245,7 @@ bool cookie_check_timestamp(struct tcp_options_received *tcp_opt, bool *ecn_ok) if (!sysctl_tcp_timestamps) return false; - tcp_opt->sack_ok = (options >> 4) & 0x1; + tcp_opt->sack_ok = (options & (1 << 4)) ? TCP_SACK_SEEN : 0; *ecn_ok = (options >> 5) & 1; if (*ecn_ok && !sysctl_tcp_ecn) return false; @@ -265,7 +266,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, struct ip_options *opt) { struct tcp_options_received tcp_opt; - u8 *hash_location; + const u8 *hash_location; struct inet_request_sock *ireq; struct tcp_request_sock *treq; struct tcp_sock *tp = tcp_sk(sk); diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 69fd7201129a..4aa7e9dc0cbb 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -14,6 +14,7 @@ #include <linux/init.h> #include <linux/slab.h> #include <linux/nsproxy.h> +#include <linux/swap.h> #include <net/snmp.h> #include <net/icmp.h> #include <net/ip.h> @@ -23,6 +24,7 @@ #include <net/cipso_ipv4.h> #include <net/inet_frag.h> #include <net/ping.h> +#include <net/tcp_memcontrol.h> static int zero; static int tcp_retr1_max = 255; @@ -73,7 +75,7 @@ static int ipv4_local_port_range(ctl_table *table, int write, } -void inet_get_ping_group_range_table(struct ctl_table *table, gid_t *low, gid_t *high) +static void inet_get_ping_group_range_table(struct ctl_table *table, gid_t *low, gid_t *high) { gid_t *data = table->data; unsigned seq; @@ -86,7 +88,7 @@ void inet_get_ping_group_range_table(struct ctl_table *table, gid_t *low, gid_t } /* Update system visible IP port range */ -static void set_ping_group_range(struct ctl_table *table, int range[2]) +static void set_ping_group_range(struct ctl_table *table, gid_t range[2]) { gid_t *data = table->data; write_seqlock(&sysctl_local_ports.lock); @@ -174,6 +176,49 @@ static int proc_allowed_congestion_control(ctl_table *ctl, return ret; } +static int ipv4_tcp_mem(ctl_table *ctl, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + int ret; + unsigned long vec[3]; + struct net *net = current->nsproxy->net_ns; +#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM + struct mem_cgroup *memcg; +#endif + + ctl_table tmp = { + .data = &vec, + .maxlen = sizeof(vec), + .mode = ctl->mode, + }; + + if (!write) { + ctl->data = &net->ipv4.sysctl_tcp_mem; + return proc_doulongvec_minmax(ctl, write, buffer, lenp, ppos); + } + + ret = proc_doulongvec_minmax(&tmp, write, buffer, lenp, ppos); + if (ret) + return ret; + +#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM + rcu_read_lock(); + memcg = mem_cgroup_from_task(current); + + tcp_prot_mem(memcg, vec[0], 0); + tcp_prot_mem(memcg, vec[1], 1); + tcp_prot_mem(memcg, vec[2], 2); + rcu_read_unlock(); +#endif + + net->ipv4.sysctl_tcp_mem[0] = vec[0]; + net->ipv4.sysctl_tcp_mem[1] = vec[1]; + net->ipv4.sysctl_tcp_mem[2] = vec[2]; + + return 0; +} + static struct ctl_table ipv4_table[] = { { .procname = "tcp_timestamps", @@ -433,13 +478,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec }, { - .procname = "tcp_mem", - .data = &sysctl_tcp_mem, - .maxlen = sizeof(sysctl_tcp_mem), - .mode = 0644, - .proc_handler = proc_doulongvec_minmax - }, - { .procname = "tcp_wmem", .data = &sysctl_tcp_wmem, .maxlen = sizeof(sysctl_tcp_wmem), @@ -721,6 +759,12 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = ipv4_ping_group_range, }, + { + .procname = "tcp_mem", + .maxlen = sizeof(init_net.ipv4.sysctl_tcp_mem), + .mode = 0644, + .proc_handler = ipv4_tcp_mem, + }, { } }; @@ -734,6 +778,7 @@ EXPORT_SYMBOL_GPL(net_ipv4_ctl_path); static __net_init int ipv4_sysctl_init_net(struct net *net) { struct ctl_table *table; + unsigned long limit; table = ipv4_net_table; if (!net_eq(net, &init_net)) { @@ -769,6 +814,12 @@ static __net_init int ipv4_sysctl_init_net(struct net *net) net->ipv4.sysctl_rt_cache_rebuild_count = 4; + limit = nr_free_buffer_pages() / 8; + limit = max(limit, 128UL); + net->ipv4.sysctl_tcp_mem[0] = limit / 4 * 3; + net->ipv4.sysctl_tcp_mem[1] = limit; + net->ipv4.sysctl_tcp_mem[2] = net->ipv4.sysctl_tcp_mem[0] * 2; + net->ipv4.ipv4_hdr = register_net_sysctl_table(net, net_ipv4_ctl_path, table); if (net->ipv4.ipv4_hdr == NULL) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 46febcacb729..9bcdec3ad772 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -282,11 +282,9 @@ int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT; struct percpu_counter tcp_orphan_count; EXPORT_SYMBOL_GPL(tcp_orphan_count); -long sysctl_tcp_mem[3] __read_mostly; int sysctl_tcp_wmem[3] __read_mostly; int sysctl_tcp_rmem[3] __read_mostly; -EXPORT_SYMBOL(sysctl_tcp_mem); EXPORT_SYMBOL(sysctl_tcp_rmem); EXPORT_SYMBOL(sysctl_tcp_wmem); @@ -374,7 +372,7 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) { unsigned int mask; struct sock *sk = sock->sk; - struct tcp_sock *tp = tcp_sk(sk); + const struct tcp_sock *tp = tcp_sk(sk); sock_poll_wait(file, sk_sleep(sk), wait); if (sk->sk_state == TCP_LISTEN) @@ -524,11 +522,11 @@ EXPORT_SYMBOL(tcp_ioctl); static inline void tcp_mark_push(struct tcp_sock *tp, struct sk_buff *skb) { - TCP_SKB_CB(skb)->flags |= TCPHDR_PSH; + TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH; tp->pushed_seq = tp->write_seq; } -static inline int forced_push(struct tcp_sock *tp) +static inline int forced_push(const struct tcp_sock *tp) { return after(tp->write_seq, tp->pushed_seq + (tp->max_window >> 1)); } @@ -540,7 +538,7 @@ static inline void skb_entail(struct sock *sk, struct sk_buff *skb) skb->csum = 0; tcb->seq = tcb->end_seq = tp->write_seq; - tcb->flags = TCPHDR_ACK; + tcb->tcp_flags = TCPHDR_ACK; tcb->sacked = 0; skb_header_release(skb); tcp_add_write_queue_tail(sk, skb); @@ -813,7 +811,7 @@ new_segment: goto wait_for_memory; if (can_coalesce) { - skb_shinfo(skb)->frags[i - 1].size += copy; + skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); } else { get_page(page); skb_fill_page_desc(skb, i, page, offset, copy); @@ -830,7 +828,7 @@ new_segment: skb_shinfo(skb)->gso_segs = 0; if (!copied) - TCP_SKB_CB(skb)->flags &= ~TCPHDR_PSH; + TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH; copied += copy; poffset += copy; @@ -888,18 +886,18 @@ int tcp_sendpage(struct sock *sk, struct page *page, int offset, } EXPORT_SYMBOL(tcp_sendpage); -#define TCP_PAGE(sk) (sk->sk_sndmsg_page) -#define TCP_OFF(sk) (sk->sk_sndmsg_off) - -static inline int select_size(struct sock *sk, int sg) +static inline int select_size(const struct sock *sk, bool sg) { - struct tcp_sock *tp = tcp_sk(sk); + const struct tcp_sock *tp = tcp_sk(sk); int tmp = tp->mss_cache; if (sg) { - if (sk_can_gso(sk)) - tmp = 0; - else { + if (sk_can_gso(sk)) { + /* Small frames wont use a full page: + * Payload will immediately follow tcp header. + */ + tmp = SKB_WITH_OVERHEAD(2048 - MAX_TCP_HEADER); + } else { int pgbreak = SKB_MAX_HEAD(MAX_TCP_HEADER); if (tmp >= pgbreak && @@ -917,9 +915,9 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, struct iovec *iov; struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; - int iovlen, flags; + int iovlen, flags, err, copied; int mss_now, size_goal; - int sg, err, copied; + bool sg; long timeo; lock_sock(sk); @@ -946,7 +944,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) goto out_err; - sg = sk->sk_route_caps & NETIF_F_SG; + sg = !!(sk->sk_route_caps & NETIF_F_SG); while (--iovlen >= 0) { size_t seglen = iov->iov_len; @@ -1005,8 +1003,13 @@ new_segment: } else { int merge = 0; int i = skb_shinfo(skb)->nr_frags; - struct page *page = TCP_PAGE(sk); - int off = TCP_OFF(sk); + struct page *page = sk->sk_sndmsg_page; + int off; + + if (page && page_count(page) == 1) + sk->sk_sndmsg_off = 0; + + off = sk->sk_sndmsg_off; if (skb_can_coalesce(skb, i, page, off) && off != PAGE_SIZE) { @@ -1023,7 +1026,7 @@ new_segment: } else if (page) { if (off == PAGE_SIZE) { put_page(page); - TCP_PAGE(sk) = page = NULL; + sk->sk_sndmsg_page = page = NULL; off = 0; } } else @@ -1049,32 +1052,31 @@ new_segment: /* If this page was new, give it to the * socket so it does not get leaked. */ - if (!TCP_PAGE(sk)) { - TCP_PAGE(sk) = page; - TCP_OFF(sk) = 0; + if (!sk->sk_sndmsg_page) { + sk->sk_sndmsg_page = page; + sk->sk_sndmsg_off = 0; } goto do_error; } /* Update the skb. */ if (merge) { - skb_shinfo(skb)->frags[i - 1].size += - copy; + skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); } else { skb_fill_page_desc(skb, i, page, off, copy); - if (TCP_PAGE(sk)) { + if (sk->sk_sndmsg_page) { get_page(page); } else if (off + copy < PAGE_SIZE) { get_page(page); - TCP_PAGE(sk) = page; + sk->sk_sndmsg_page = page; } } - TCP_OFF(sk) = off + copy; + sk->sk_sndmsg_off = off + copy; } if (!copied) - TCP_SKB_CB(skb)->flags &= ~TCPHDR_PSH; + TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH; tp->write_seq += copy; TCP_SKB_CB(skb)->end_seq += copy; @@ -1194,13 +1196,11 @@ void tcp_cleanup_rbuf(struct sock *sk, int copied) struct tcp_sock *tp = tcp_sk(sk); int time_to_ack = 0; -#if TCP_DEBUG struct sk_buff *skb = skb_peek(&sk->sk_receive_queue); WARN(skb && !before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq), "cleanup rbuf bug: copied %X seq %X rcvnxt %X\n", tp->copied_seq, TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt); -#endif if (inet_csk_ack_scheduled(sk)) { const struct inet_connection_sock *icsk = inet_csk(sk); @@ -2409,7 +2409,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen) { - struct inet_connection_sock *icsk = inet_csk(sk); + const struct inet_connection_sock *icsk = inet_csk(sk); if (level != SOL_TCP) return icsk->icsk_af_ops->setsockopt(sk, level, optname, @@ -2431,9 +2431,9 @@ EXPORT_SYMBOL(compat_tcp_setsockopt); #endif /* Return information about state of tcp endpoint in API format. */ -void tcp_get_info(struct sock *sk, struct tcp_info *info) +void tcp_get_info(const struct sock *sk, struct tcp_info *info) { - struct tcp_sock *tp = tcp_sk(sk); + const struct tcp_sock *tp = tcp_sk(sk); const struct inet_connection_sock *icsk = inet_csk(sk); u32 now = tcp_time_stamp; @@ -2455,8 +2455,10 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_rcv_wscale = tp->rx_opt.rcv_wscale; } - if (tp->ecn_flags&TCP_ECN_OK) + if (tp->ecn_flags & TCP_ECN_OK) info->tcpi_options |= TCPI_OPT_ECN; + if (tp->ecn_flags & TCP_ECN_SEEN) + info->tcpi_options |= TCPI_OPT_ECN_SEEN; info->tcpi_rto = jiffies_to_usecs(icsk->icsk_rto); info->tcpi_ato = jiffies_to_usecs(icsk->icsk_ack.ato); @@ -2654,7 +2656,8 @@ int compat_tcp_getsockopt(struct sock *sk, int level, int optname, EXPORT_SYMBOL(compat_tcp_getsockopt); #endif -struct sk_buff *tcp_tso_segment(struct sk_buff *skb, u32 features) +struct sk_buff *tcp_tso_segment(struct sk_buff *skb, + netdev_features_t features) { struct sk_buff *segs = ERR_PTR(-EINVAL); struct tcphdr *th; @@ -2857,26 +2860,25 @@ EXPORT_SYMBOL(tcp_gro_complete); #ifdef CONFIG_TCP_MD5SIG static unsigned long tcp_md5sig_users; -static struct tcp_md5sig_pool * __percpu *tcp_md5sig_pool; +static struct tcp_md5sig_pool __percpu *tcp_md5sig_pool; static DEFINE_SPINLOCK(tcp_md5sig_pool_lock); -static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool * __percpu *pool) +static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool __percpu *pool) { int cpu; + for_each_possible_cpu(cpu) { - struct tcp_md5sig_pool *p = *per_cpu_ptr(pool, cpu); - if (p) { - if (p->md5_desc.tfm) - crypto_free_hash(p->md5_desc.tfm); - kfree(p); - } + struct tcp_md5sig_pool *p = per_cpu_ptr(pool, cpu); + + if (p->md5_desc.tfm) + crypto_free_hash(p->md5_desc.tfm); } free_percpu(pool); } void tcp_free_md5sig_pool(void) { - struct tcp_md5sig_pool * __percpu *pool = NULL; + struct tcp_md5sig_pool __percpu *pool = NULL; spin_lock_bh(&tcp_md5sig_pool_lock); if (--tcp_md5sig_users == 0) { @@ -2889,30 +2891,24 @@ void tcp_free_md5sig_pool(void) } EXPORT_SYMBOL(tcp_free_md5sig_pool); -static struct tcp_md5sig_pool * __percpu * +static struct tcp_md5sig_pool __percpu * __tcp_alloc_md5sig_pool(struct sock *sk) { int cpu; - struct tcp_md5sig_pool * __percpu *pool; + struct tcp_md5sig_pool __percpu *pool; - pool = alloc_percpu(struct tcp_md5sig_pool *); + pool = alloc_percpu(struct tcp_md5sig_pool); if (!pool) return NULL; for_each_possible_cpu(cpu) { - struct tcp_md5sig_pool *p; struct crypto_hash *hash; - p = kzalloc(sizeof(*p), sk->sk_allocation); - if (!p) - goto out_free; - *per_cpu_ptr(pool, cpu) = p; - hash = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC); if (!hash || IS_ERR(hash)) goto out_free; - p->md5_desc.tfm = hash; + per_cpu_ptr(pool, cpu)->md5_desc.tfm = hash; } return pool; out_free: @@ -2920,9 +2916,9 @@ out_free: return NULL; } -struct tcp_md5sig_pool * __percpu *tcp_alloc_md5sig_pool(struct sock *sk) +struct tcp_md5sig_pool __percpu *tcp_alloc_md5sig_pool(struct sock *sk) { - struct tcp_md5sig_pool * __percpu *pool; + struct tcp_md5sig_pool __percpu *pool; int alloc = 0; retry: @@ -2941,7 +2937,7 @@ retry: if (alloc) { /* we cannot hold spinlock here because this may sleep. */ - struct tcp_md5sig_pool * __percpu *p; + struct tcp_md5sig_pool __percpu *p; p = __tcp_alloc_md5sig_pool(sk); spin_lock_bh(&tcp_md5sig_pool_lock); @@ -2974,7 +2970,7 @@ EXPORT_SYMBOL(tcp_alloc_md5sig_pool); */ struct tcp_md5sig_pool *tcp_get_md5sig_pool(void) { - struct tcp_md5sig_pool * __percpu *p; + struct tcp_md5sig_pool __percpu *p; local_bh_disable(); @@ -2985,7 +2981,7 @@ struct tcp_md5sig_pool *tcp_get_md5sig_pool(void) spin_unlock(&tcp_md5sig_pool_lock); if (p) - return *this_cpu_ptr(p); + return this_cpu_ptr(p); local_bh_enable(); return NULL; @@ -3000,23 +2996,25 @@ void tcp_put_md5sig_pool(void) EXPORT_SYMBOL(tcp_put_md5sig_pool); int tcp_md5_hash_header(struct tcp_md5sig_pool *hp, - struct tcphdr *th) + const struct tcphdr *th) { struct scatterlist sg; + struct tcphdr hdr; int err; - __sum16 old_checksum = th->check; - th->check = 0; + /* We are not allowed to change tcphdr, make a local copy */ + memcpy(&hdr, th, sizeof(hdr)); + hdr.check = 0; + /* options aren't included in the hash */ - sg_init_one(&sg, th, sizeof(struct tcphdr)); - err = crypto_hash_update(&hp->md5_desc, &sg, sizeof(struct tcphdr)); - th->check = old_checksum; + sg_init_one(&sg, &hdr, sizeof(hdr)); + err = crypto_hash_update(&hp->md5_desc, &sg, sizeof(hdr)); return err; } EXPORT_SYMBOL(tcp_md5_hash_header); int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp, - struct sk_buff *skb, unsigned header_len) + const struct sk_buff *skb, unsigned int header_len) { struct scatterlist sg; const struct tcphdr *tp = tcp_hdr(skb); @@ -3035,8 +3033,9 @@ int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp, for (i = 0; i < shi->nr_frags; ++i) { const struct skb_frag_struct *f = &shi->frags[i]; - sg_set_page(&sg, f->page, f->size, f->page_offset); - if (crypto_hash_update(desc, &sg, f->size)) + struct page *page = skb_frag_page(f); + sg_set_page(&sg, page, skb_frag_size(f), f->page_offset); + if (crypto_hash_update(desc, &sg, skb_frag_size(f))) return 1; } @@ -3048,7 +3047,7 @@ int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp, } EXPORT_SYMBOL(tcp_md5_hash_skb_data); -int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, struct tcp_md5sig_key *key) +int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, const struct tcp_md5sig_key *key) { struct scatterlist sg; @@ -3277,14 +3276,9 @@ void __init tcp_init(void) sysctl_tcp_max_orphans = cnt / 2; sysctl_max_syn_backlog = max(128, cnt / 256); - limit = nr_free_buffer_pages() / 8; - limit = max(limit, 128UL); - sysctl_tcp_mem[0] = limit / 4 * 3; - sysctl_tcp_mem[1] = limit; - sysctl_tcp_mem[2] = sysctl_tcp_mem[0] * 2; - /* Set per-socket limits to no more than 1/128 the pressure threshold */ - limit = ((unsigned long)sysctl_tcp_mem[1]) << (PAGE_SHIFT - 7); + limit = ((unsigned long)init_net.ipv4.sysctl_tcp_mem[1]) + << (PAGE_SHIFT - 7); max_share = min(4UL*1024*1024, limit); sysctl_tcp_wmem[0] = SK_MEM_QUANTUM; diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 850c737e08e2..fc6d475f488f 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -292,7 +292,7 @@ int tcp_is_cwnd_limited(const struct sock *sk, u32 in_flight) left * sysctl_tcp_tso_win_divisor < tp->snd_cwnd && left * tp->mss_cache < sk->sk_gso_max_size) return 1; - return left <= tcp_max_burst(tp); + return left <= tcp_max_tso_deferred_mss(tp); } EXPORT_SYMBOL_GPL(tcp_is_cwnd_limited); diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index 939edb3b8e4d..ed3f2ad42e0f 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -34,11 +34,23 @@ static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, tcp_get_info(sk, info); } +static void tcp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, + struct inet_diag_req_v2 *r, struct nlattr *bc) +{ + inet_diag_dump_icsk(&tcp_hashinfo, skb, cb, r, bc); +} + +static int tcp_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh, + struct inet_diag_req_v2 *req) +{ + return inet_diag_dump_one_icsk(&tcp_hashinfo, in_skb, nlh, req); +} + static const struct inet_diag_handler tcp_diag_handler = { - .idiag_hashinfo = &tcp_hashinfo, + .dump = tcp_diag_dump, + .dump_one = tcp_diag_dump_one, .idiag_get_info = tcp_diag_get_info, - .idiag_type = TCPDIAG_GETSOCK, - .idiag_info_size = sizeof(struct tcp_info), + .idiag_type = IPPROTO_TCP, }; static int __init tcp_diag_init(void) @@ -54,4 +66,4 @@ static void __exit tcp_diag_exit(void) module_init(tcp_diag_init); module_exit(tcp_diag_exit); MODULE_LICENSE("GPL"); -MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_INET_DIAG, TCPDIAG_GETSOCK); +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2-6 /* AF_INET - IPPROTO_TCP */); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index d73aab3fbfc0..2877c3e09587 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -206,7 +206,7 @@ static inline void TCP_ECN_queue_cwr(struct tcp_sock *tp) tp->ecn_flags |= TCP_ECN_QUEUE_CWR; } -static inline void TCP_ECN_accept_cwr(struct tcp_sock *tp, struct sk_buff *skb) +static inline void TCP_ECN_accept_cwr(struct tcp_sock *tp, const struct sk_buff *skb) { if (tcp_hdr(skb)->cwr) tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR; @@ -217,32 +217,41 @@ static inline void TCP_ECN_withdraw_cwr(struct tcp_sock *tp) tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR; } -static inline void TCP_ECN_check_ce(struct tcp_sock *tp, struct sk_buff *skb) +static inline void TCP_ECN_check_ce(struct tcp_sock *tp, const struct sk_buff *skb) { - if (tp->ecn_flags & TCP_ECN_OK) { - if (INET_ECN_is_ce(TCP_SKB_CB(skb)->flags)) - tp->ecn_flags |= TCP_ECN_DEMAND_CWR; + if (!(tp->ecn_flags & TCP_ECN_OK)) + return; + + switch (TCP_SKB_CB(skb)->ip_dsfield & INET_ECN_MASK) { + case INET_ECN_NOT_ECT: /* Funny extension: if ECT is not set on a segment, - * it is surely retransmit. It is not in ECN RFC, - * but Linux follows this rule. */ - else if (INET_ECN_is_not_ect((TCP_SKB_CB(skb)->flags))) + * and we already seen ECT on a previous segment, + * it is probably a retransmit. + */ + if (tp->ecn_flags & TCP_ECN_SEEN) tcp_enter_quickack_mode((struct sock *)tp); + break; + case INET_ECN_CE: + tp->ecn_flags |= TCP_ECN_DEMAND_CWR; + /* fallinto */ + default: + tp->ecn_flags |= TCP_ECN_SEEN; } } -static inline void TCP_ECN_rcv_synack(struct tcp_sock *tp, struct tcphdr *th) +static inline void TCP_ECN_rcv_synack(struct tcp_sock *tp, const struct tcphdr *th) { if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || th->cwr)) tp->ecn_flags &= ~TCP_ECN_OK; } -static inline void TCP_ECN_rcv_syn(struct tcp_sock *tp, struct tcphdr *th) +static inline void TCP_ECN_rcv_syn(struct tcp_sock *tp, const struct tcphdr *th) { if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || !th->cwr)) tp->ecn_flags &= ~TCP_ECN_OK; } -static inline int TCP_ECN_rcv_ecn_echo(struct tcp_sock *tp, struct tcphdr *th) +static inline int TCP_ECN_rcv_ecn_echo(const struct tcp_sock *tp, const struct tcphdr *th) { if (th->ece && !th->syn && (tp->ecn_flags & TCP_ECN_OK)) return 1; @@ -256,14 +265,11 @@ static inline int TCP_ECN_rcv_ecn_echo(struct tcp_sock *tp, struct tcphdr *th) static void tcp_fixup_sndbuf(struct sock *sk) { - int sndmem = tcp_sk(sk)->rx_opt.mss_clamp + MAX_TCP_HEADER + 16 + - sizeof(struct sk_buff); + int sndmem = SKB_TRUESIZE(tcp_sk(sk)->rx_opt.mss_clamp + MAX_TCP_HEADER); - if (sk->sk_sndbuf < 3 * sndmem) { - sk->sk_sndbuf = 3 * sndmem; - if (sk->sk_sndbuf > sysctl_tcp_wmem[2]) - sk->sk_sndbuf = sysctl_tcp_wmem[2]; - } + sndmem *= TCP_INIT_CWND; + if (sk->sk_sndbuf < sndmem) + sk->sk_sndbuf = min(sndmem, sysctl_tcp_wmem[2]); } /* 2. Tuning advertised window (window_clamp, rcv_ssthresh) @@ -309,14 +315,14 @@ static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb) return 0; } -static void tcp_grow_window(struct sock *sk, struct sk_buff *skb) +static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); /* Check #1 */ if (tp->rcv_ssthresh < tp->window_clamp && (int)tp->rcv_ssthresh < tcp_space(sk) && - !tcp_memory_pressure) { + !sk_under_memory_pressure(sk)) { int incr; /* Check #2. Increase window, if skb with such overhead @@ -339,17 +345,24 @@ static void tcp_grow_window(struct sock *sk, struct sk_buff *skb) static void tcp_fixup_rcvbuf(struct sock *sk) { - struct tcp_sock *tp = tcp_sk(sk); - int rcvmem = tp->advmss + MAX_TCP_HEADER + 16 + sizeof(struct sk_buff); + u32 mss = tcp_sk(sk)->advmss; + u32 icwnd = TCP_DEFAULT_INIT_RCVWND; + int rcvmem; - /* Try to select rcvbuf so that 4 mss-sized segments - * will fit to window and corresponding skbs will fit to our rcvbuf. - * (was 3; 4 is minimum to allow fast retransmit to work.) + /* Limit to 10 segments if mss <= 1460, + * or 14600/mss segments, with a minimum of two segments. */ - while (tcp_win_from_space(rcvmem) < tp->advmss) + if (mss > 1460) + icwnd = max_t(u32, (1460 * TCP_DEFAULT_INIT_RCVWND) / mss, 2); + + rcvmem = SKB_TRUESIZE(mss + MAX_TCP_HEADER); + while (tcp_win_from_space(rcvmem) < mss) rcvmem += 128; - if (sk->sk_rcvbuf < 4 * rcvmem) - sk->sk_rcvbuf = min(4 * rcvmem, sysctl_tcp_rmem[2]); + + rcvmem *= icwnd; + + if (sk->sk_rcvbuf < rcvmem) + sk->sk_rcvbuf = min(rcvmem, sysctl_tcp_rmem[2]); } /* 4. Try to fixup all. It is made immediately after connection enters @@ -398,8 +411,8 @@ static void tcp_clamp_window(struct sock *sk) if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] && !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) && - !tcp_memory_pressure && - atomic_long_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) { + !sk_under_memory_pressure(sk) && + sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)) { sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc), sysctl_tcp_rmem[2]); } @@ -416,7 +429,7 @@ static void tcp_clamp_window(struct sock *sk) */ void tcp_initialize_rcv_mss(struct sock *sk) { - struct tcp_sock *tp = tcp_sk(sk); + const struct tcp_sock *tp = tcp_sk(sk); unsigned int hint = min_t(unsigned int, tp->advmss, tp->mss_cache); hint = min(hint, tp->rcv_wnd / 2); @@ -531,8 +544,7 @@ void tcp_rcv_space_adjust(struct sock *sk) space /= tp->advmss; if (!space) space = 1; - rcvmem = (tp->advmss + MAX_TCP_HEADER + - 16 + sizeof(struct sk_buff)); + rcvmem = SKB_TRUESIZE(tp->advmss + MAX_TCP_HEADER); while (tcp_win_from_space(rcvmem) < tp->advmss) rcvmem += 128; space *= rcvmem; @@ -812,7 +824,7 @@ void tcp_update_metrics(struct sock *sk) } } -__u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst) +__u32 tcp_init_cwnd(const struct tcp_sock *tp, const struct dst_entry *dst) { __u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0); @@ -853,13 +865,13 @@ static void tcp_disable_fack(struct tcp_sock *tp) /* RFC3517 uses different metric in lost marker => reset on change */ if (tcp_is_fack(tp)) tp->lost_skb_hint = NULL; - tp->rx_opt.sack_ok &= ~2; + tp->rx_opt.sack_ok &= ~TCP_FACK_ENABLED; } /* Take a notice that peer is sending D-SACKs */ static void tcp_dsack_seen(struct tcp_sock *tp) { - tp->rx_opt.sack_ok |= 4; + tp->rx_opt.sack_ok |= TCP_DSACK_SEEN; } /* Initialize metrics on socket. */ @@ -1204,7 +1216,7 @@ static void tcp_mark_lost_retrans(struct sock *sk) tp->lost_retrans_low = new_low_seq; } -static int tcp_check_dsack(struct sock *sk, struct sk_buff *ack_skb, +static int tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb, struct tcp_sack_block_wire *sp, int num_sacks, u32 prior_snd_una) { @@ -1298,7 +1310,7 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb, return in_sack; } -static u8 tcp_sacktag_one(struct sk_buff *skb, struct sock *sk, +static u8 tcp_sacktag_one(const struct sk_buff *skb, struct sock *sk, struct tcp_sacktag_state *state, int dup_sack, int pcount) { @@ -1438,7 +1450,7 @@ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, tp->lost_cnt_hint -= tcp_skb_pcount(prev); } - TCP_SKB_CB(skb)->flags |= TCP_SKB_CB(prev)->flags; + TCP_SKB_CB(skb)->tcp_flags |= TCP_SKB_CB(prev)->tcp_flags; if (skb == tcp_highest_sack(sk)) tcp_advance_highest_sack(sk, skb); @@ -1453,13 +1465,13 @@ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, /* I wish gso_size would have a bit more sane initialization than * something-or-zero which complicates things */ -static int tcp_skb_seglen(struct sk_buff *skb) +static int tcp_skb_seglen(const struct sk_buff *skb) { return tcp_skb_pcount(skb) == 1 ? skb->len : tcp_skb_mss(skb); } /* Shifting pages past head area doesn't work */ -static int skb_can_shift(struct sk_buff *skb) +static int skb_can_shift(const struct sk_buff *skb) { return !skb_headlen(skb) && skb_is_nonlinear(skb); } @@ -1708,19 +1720,19 @@ static struct sk_buff *tcp_maybe_skipping_dsack(struct sk_buff *skb, return skb; } -static int tcp_sack_cache_ok(struct tcp_sock *tp, struct tcp_sack_block *cache) +static int tcp_sack_cache_ok(const struct tcp_sock *tp, const struct tcp_sack_block *cache) { return cache < tp->recv_sack_cache + ARRAY_SIZE(tp->recv_sack_cache); } static int -tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, +tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, u32 prior_snd_una) { const struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); - unsigned char *ptr = (skb_transport_header(ack_skb) + - TCP_SKB_CB(ack_skb)->sacked); + const unsigned char *ptr = (skb_transport_header(ack_skb) + + TCP_SKB_CB(ack_skb)->sacked); struct tcp_sack_block_wire *sp_wire = (struct tcp_sack_block_wire *)(ptr+2); struct tcp_sack_block sp[TCP_NUM_SACKS]; struct tcp_sack_block *cache; @@ -2284,7 +2296,7 @@ static int tcp_check_sack_reneging(struct sock *sk, int flag) return 0; } -static inline int tcp_fackets_out(struct tcp_sock *tp) +static inline int tcp_fackets_out(const struct tcp_sock *tp) { return tcp_is_reno(tp) ? tp->sacked_out + 1 : tp->fackets_out; } @@ -2304,19 +2316,20 @@ static inline int tcp_fackets_out(struct tcp_sock *tp) * they differ. Since neither occurs due to loss, TCP should really * ignore them. */ -static inline int tcp_dupack_heuristics(struct tcp_sock *tp) +static inline int tcp_dupack_heuristics(const struct tcp_sock *tp) { return tcp_is_fack(tp) ? tp->fackets_out : tp->sacked_out + 1; } -static inline int tcp_skb_timedout(struct sock *sk, struct sk_buff *skb) +static inline int tcp_skb_timedout(const struct sock *sk, + const struct sk_buff *skb) { return tcp_time_stamp - TCP_SKB_CB(skb)->when > inet_csk(sk)->icsk_rto; } -static inline int tcp_head_timedout(struct sock *sk) +static inline int tcp_head_timedout(const struct sock *sk) { - struct tcp_sock *tp = tcp_sk(sk); + const struct tcp_sock *tp = tcp_sk(sk); return tp->packets_out && tcp_skb_timedout(sk, tcp_write_queue_head(sk)); @@ -2627,7 +2640,7 @@ static void tcp_cwnd_down(struct sock *sk, int flag) /* Nothing was retransmitted or returned timestamp is less * than timestamp of the first retransmission. */ -static inline int tcp_packet_delayed(struct tcp_sock *tp) +static inline int tcp_packet_delayed(const struct tcp_sock *tp) { return !tp->retrans_stamp || (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr && @@ -2650,7 +2663,7 @@ static void DBGUNDO(struct sock *sk, const char *msg) tp->snd_ssthresh, tp->prior_ssthresh, tp->packets_out); } -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) else if (sk->sk_family == AF_INET6) { struct ipv6_pinfo *np = inet6_sk(sk); printk(KERN_DEBUG "Undo %s %pI6/%u c%u l%u ss%u/%u p%u\n", @@ -2688,7 +2701,7 @@ static void tcp_undo_cwr(struct sock *sk, const bool undo_ssthresh) tp->snd_cwnd_stamp = tcp_time_stamp; } -static inline int tcp_may_undo(struct tcp_sock *tp) +static inline int tcp_may_undo(const struct tcp_sock *tp) { return tp->undo_marker && (!tp->undo_retrans || tcp_packet_delayed(tp)); } @@ -2752,9 +2765,9 @@ static void tcp_try_undo_dsack(struct sock *sk) * that successive retransmissions of a segment must not advance * retrans_stamp under any conditions. */ -static int tcp_any_retrans_done(struct sock *sk) +static int tcp_any_retrans_done(const struct sock *sk) { - struct tcp_sock *tp = tcp_sk(sk); + const struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; if (tp->retrans_out) @@ -2828,9 +2841,13 @@ static int tcp_try_undo_loss(struct sock *sk) static inline void tcp_complete_cwr(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); - /* Do not moderate cwnd if it's already undone in cwr or recovery */ - if (tp->undo_marker && tp->snd_cwnd > tp->snd_ssthresh) { - tp->snd_cwnd = tp->snd_ssthresh; + + /* Do not moderate cwnd if it's already undone in cwr or recovery. */ + if (tp->undo_marker) { + if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR) + tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh); + else /* PRR */ + tp->snd_cwnd = tp->snd_ssthresh; tp->snd_cwnd_stamp = tcp_time_stamp; } tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR); @@ -2841,7 +2858,7 @@ static void tcp_try_keep_open(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); int state = TCP_CA_Open; - if (tcp_left_out(tp) || tcp_any_retrans_done(sk) || tp->undo_marker) + if (tcp_left_out(tp) || tcp_any_retrans_done(sk)) state = TCP_CA_Disorder; if (inet_csk(sk)->icsk_ca_state != state) { @@ -2864,7 +2881,8 @@ static void tcp_try_to_open(struct sock *sk, int flag) if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) { tcp_try_keep_open(sk); - tcp_moderate_cwnd(tp); + if (inet_csk(sk)->icsk_ca_state != TCP_CA_Open) + tcp_moderate_cwnd(tp); } else { tcp_cwnd_down(sk, flag); } @@ -2948,6 +2966,38 @@ void tcp_simple_retransmit(struct sock *sk) } EXPORT_SYMBOL(tcp_simple_retransmit); +/* This function implements the PRR algorithm, specifcally the PRR-SSRB + * (proportional rate reduction with slow start reduction bound) as described in + * http://www.ietf.org/id/draft-mathis-tcpm-proportional-rate-reduction-01.txt. + * It computes the number of packets to send (sndcnt) based on packets newly + * delivered: + * 1) If the packets in flight is larger than ssthresh, PRR spreads the + * cwnd reductions across a full RTT. + * 2) If packets in flight is lower than ssthresh (such as due to excess + * losses and/or application stalls), do not perform any further cwnd + * reductions, but instead slow start up to ssthresh. + */ +static void tcp_update_cwnd_in_recovery(struct sock *sk, int newly_acked_sacked, + int fast_rexmit, int flag) +{ + struct tcp_sock *tp = tcp_sk(sk); + int sndcnt = 0; + int delta = tp->snd_ssthresh - tcp_packets_in_flight(tp); + + if (tcp_packets_in_flight(tp) > tp->snd_ssthresh) { + u64 dividend = (u64)tp->snd_ssthresh * tp->prr_delivered + + tp->prior_cwnd - 1; + sndcnt = div_u64(dividend, tp->prior_cwnd) - tp->prr_out; + } else { + sndcnt = min_t(int, delta, + max_t(int, tp->prr_delivered - tp->prr_out, + newly_acked_sacked) + 1); + } + + sndcnt = max(sndcnt, (fast_rexmit ? 1 : 0)); + tp->snd_cwnd = tcp_packets_in_flight(tp) + sndcnt; +} + /* Process an event, which can update packets-in-flight not trivially. * Main goal of this function is to calculate new estimate for left_out, * taking into account both packets sitting in receiver's buffer and @@ -2959,11 +3009,12 @@ EXPORT_SYMBOL(tcp_simple_retransmit); * It does _not_ decide what to send, it is made in function * tcp_xmit_retransmit_queue(). */ -static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag) +static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, + int newly_acked_sacked, bool is_dupack, + int flag) { struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); - int is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP)); int do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) && (tcp_fackets_out(tp) > tp->reordering)); int fast_rexmit = 0, mib_idx; @@ -3016,17 +3067,6 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag) } break; - case TCP_CA_Disorder: - tcp_try_undo_dsack(sk); - if (!tp->undo_marker || - /* For SACK case do not Open to allow to undo - * catching for all duplicate ACKs. */ - tcp_is_reno(tp) || tp->snd_una != tp->high_seq) { - tp->undo_marker = 0; - tcp_set_ca_state(sk, TCP_CA_Open); - } - break; - case TCP_CA_Recovery: if (tcp_is_reno(tp)) tcp_reset_reno_sack(tp); @@ -3067,7 +3107,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag) tcp_add_reno_sack(sk); } - if (icsk->icsk_ca_state == TCP_CA_Disorder) + if (icsk->icsk_ca_state <= TCP_CA_Disorder) tcp_try_undo_dsack(sk); if (!tcp_time_to_recover(sk)) { @@ -3109,13 +3149,17 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag) tp->bytes_acked = 0; tp->snd_cwnd_cnt = 0; + tp->prior_cwnd = tp->snd_cwnd; + tp->prr_delivered = 0; + tp->prr_out = 0; tcp_set_ca_state(sk, TCP_CA_Recovery); fast_rexmit = 1; } if (do_lost || (tcp_is_fack(tp) && tcp_head_timedout(sk))) tcp_update_scoreboard(sk, fast_rexmit); - tcp_cwnd_down(sk, flag); + tp->prr_delivered += newly_acked_sacked; + tcp_update_cwnd_in_recovery(sk, newly_acked_sacked, fast_rexmit, flag); tcp_xmit_retransmit_queue(sk); } @@ -3192,7 +3236,7 @@ static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) */ static void tcp_rearm_rto(struct sock *sk) { - struct tcp_sock *tp = tcp_sk(sk); + const struct tcp_sock *tp = tcp_sk(sk); if (!tp->packets_out) { inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS); @@ -3296,7 +3340,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, * connection startup slow start one packet too * quickly. This is severely frowned upon behavior. */ - if (!(scb->flags & TCPHDR_SYN)) { + if (!(scb->tcp_flags & TCPHDR_SYN)) { flag |= FLAG_DATA_ACKED; } else { flag |= FLAG_SYN_ACKED; @@ -3444,7 +3488,7 @@ static inline int tcp_may_update_window(const struct tcp_sock *tp, * Window update algorithm, described in RFC793/RFC1122 (used in linux-2.2 * and in FreeBSD. NetBSD's one is even worse.) is wrong. */ -static int tcp_ack_update_window(struct sock *sk, struct sk_buff *skb, u32 ack, +static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32 ack, u32 ack_seq) { struct tcp_sock *tp = tcp_sk(sk); @@ -3620,16 +3664,20 @@ static int tcp_process_frto(struct sock *sk, int flag) } /* This routine deals with incoming acks, but not outgoing ones. */ -static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) +static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) { struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); u32 prior_snd_una = tp->snd_una; u32 ack_seq = TCP_SKB_CB(skb)->seq; u32 ack = TCP_SKB_CB(skb)->ack_seq; + bool is_dupack = false; u32 prior_in_flight; u32 prior_fackets; int prior_packets; + int prior_sacked = tp->sacked_out; + int pkts_acked = 0; + int newly_acked_sacked = 0; int frto_cwnd = 0; /* If the ack is older than previous acks @@ -3701,6 +3749,10 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) /* See if we can take anything off of the retransmit queue. */ flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una); + pkts_acked = prior_packets - tp->packets_out; + newly_acked_sacked = (prior_packets - prior_sacked) - + (tp->packets_out - tp->sacked_out); + if (tp->frto_counter) frto_cwnd = tcp_process_frto(sk, flag); /* Guarantee sacktag reordering detection against wrap-arounds */ @@ -3712,8 +3764,9 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) if ((flag & FLAG_DATA_ACKED) && !frto_cwnd && tcp_may_raise_cwnd(sk, flag)) tcp_cong_avoid(sk, ack, prior_in_flight); - tcp_fastretrans_alert(sk, prior_packets - tp->packets_out, - flag); + is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP)); + tcp_fastretrans_alert(sk, pkts_acked, newly_acked_sacked, + is_dupack, flag); } else { if ((flag & FLAG_DATA_ACKED) && !frto_cwnd) tcp_cong_avoid(sk, ack, prior_in_flight); @@ -3725,6 +3778,10 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) return 1; no_queue: + /* If data was DSACKed, see if we can undo a cwnd reduction. */ + if (flag & FLAG_DSACKING_ACK) + tcp_fastretrans_alert(sk, pkts_acked, newly_acked_sacked, + is_dupack, flag); /* If this ack opens up a zero window, clear backoff. It was * being used to time the probes, and is probably far higher than * it needs to be for normal retransmission. @@ -3738,10 +3795,14 @@ invalid_ack: return -1; old_ack: + /* If data was SACKed, tag it and see if we should send more data. + * If data was DSACKed, see if we can undo a cwnd reduction. + */ if (TCP_SKB_CB(skb)->sacked) { - tcp_sacktag_write_queue(sk, skb, prior_snd_una); - if (icsk->icsk_ca_state == TCP_CA_Open) - tcp_try_keep_open(sk); + flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una); + newly_acked_sacked = tp->sacked_out - prior_sacked; + tcp_fastretrans_alert(sk, pkts_acked, newly_acked_sacked, + is_dupack, flag); } SOCK_DEBUG(sk, "Ack %u before %u:%u\n", ack, tp->snd_una, tp->snd_nxt); @@ -3752,14 +3813,14 @@ old_ack: * But, this can also be called on packets in the established flow when * the fast version below fails. */ -void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, - u8 **hvpp, int estab) +void tcp_parse_options(const struct sk_buff *skb, struct tcp_options_received *opt_rx, + const u8 **hvpp, int estab) { - unsigned char *ptr; - struct tcphdr *th = tcp_hdr(skb); + const unsigned char *ptr; + const struct tcphdr *th = tcp_hdr(skb); int length = (th->doff * 4) - sizeof(struct tcphdr); - ptr = (unsigned char *)(th + 1); + ptr = (const unsigned char *)(th + 1); opt_rx->saw_tstamp = 0; while (length > 0) { @@ -3817,7 +3878,7 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, case TCPOPT_SACK_PERM: if (opsize == TCPOLEN_SACK_PERM && th->syn && !estab && sysctl_tcp_sack) { - opt_rx->sack_ok = 1; + opt_rx->sack_ok = TCP_SACK_SEEN; tcp_sack_reset(opt_rx); } break; @@ -3870,9 +3931,9 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, } EXPORT_SYMBOL(tcp_parse_options); -static int tcp_parse_aligned_timestamp(struct tcp_sock *tp, struct tcphdr *th) +static int tcp_parse_aligned_timestamp(struct tcp_sock *tp, const struct tcphdr *th) { - __be32 *ptr = (__be32 *)(th + 1); + const __be32 *ptr = (const __be32 *)(th + 1); if (*ptr == htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) { @@ -3889,8 +3950,9 @@ static int tcp_parse_aligned_timestamp(struct tcp_sock *tp, struct tcphdr *th) /* Fast parse options. This hopes to only see timestamps. * If it is wrong it falls back on tcp_parse_options(). */ -static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th, - struct tcp_sock *tp, u8 **hvpp) +static int tcp_fast_parse_options(const struct sk_buff *skb, + const struct tcphdr *th, + struct tcp_sock *tp, const u8 **hvpp) { /* In the spirit of fast parsing, compare doff directly to constant * values. Because equality is used, short doff can be ignored here. @@ -3911,10 +3973,10 @@ static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th, /* * Parse MD5 Signature option */ -u8 *tcp_parse_md5sig_option(struct tcphdr *th) +const u8 *tcp_parse_md5sig_option(const struct tcphdr *th) { - int length = (th->doff << 2) - sizeof (*th); - u8 *ptr = (u8*)(th + 1); + int length = (th->doff << 2) - sizeof(*th); + const u8 *ptr = (const u8 *)(th + 1); /* If the TCP option is too short, we can short cut */ if (length < TCPOLEN_MD5SIG) @@ -3991,8 +4053,8 @@ static inline void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq) static int tcp_disordered_ack(const struct sock *sk, const struct sk_buff *skb) { - struct tcp_sock *tp = tcp_sk(sk); - struct tcphdr *th = tcp_hdr(skb); + const struct tcp_sock *tp = tcp_sk(sk); + const struct tcphdr *th = tcp_hdr(skb); u32 seq = TCP_SKB_CB(skb)->seq; u32 ack = TCP_SKB_CB(skb)->ack_seq; @@ -4031,7 +4093,7 @@ static inline int tcp_paws_discard(const struct sock *sk, * (borrowed from freebsd) */ -static inline int tcp_sequence(struct tcp_sock *tp, u32 seq, u32 end_seq) +static inline int tcp_sequence(const struct tcp_sock *tp, u32 seq, u32 end_seq) { return !before(end_seq, tp->rcv_wup) && !after(seq, tp->rcv_nxt + tcp_receive_window(tp)); @@ -4076,7 +4138,7 @@ static void tcp_reset(struct sock *sk) * * If we are in FINWAIT-2, a received FIN moves us to TIME-WAIT. */ -static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th) +static void tcp_fin(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); @@ -4188,7 +4250,7 @@ static void tcp_dsack_extend(struct sock *sk, u32 seq, u32 end_seq) tcp_sack_extend(tp->duplicate_sack, seq, end_seq); } -static void tcp_send_dupack(struct sock *sk, struct sk_buff *skb) +static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); @@ -4347,7 +4409,7 @@ static void tcp_ofo_queue(struct sock *sk) __skb_queue_tail(&sk->sk_receive_queue, skb); tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; if (tcp_hdr(skb)->fin) - tcp_fin(skb, sk, tcp_hdr(skb)); + tcp_fin(sk); } } @@ -4375,7 +4437,7 @@ static inline int tcp_try_rmem_schedule(struct sock *sk, unsigned int size) static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) { - struct tcphdr *th = tcp_hdr(skb); + const struct tcphdr *th = tcp_hdr(skb); struct tcp_sock *tp = tcp_sk(sk); int eaten = -1; @@ -4429,7 +4491,7 @@ queue_and_out: if (skb->len) tcp_event_data_recv(sk, skb); if (th->fin) - tcp_fin(skb, sk, th); + tcp_fin(sk); if (!skb_queue_empty(&tp->out_of_order_queue)) { tcp_ofo_queue(sk); @@ -4804,7 +4866,7 @@ static int tcp_prune_queue(struct sock *sk) if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) tcp_clamp_window(sk); - else if (tcp_memory_pressure) + else if (sk_under_memory_pressure(sk)) tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss); tcp_collapse_ofo_queue(sk); @@ -4859,9 +4921,9 @@ void tcp_cwnd_application_limited(struct sock *sk) tp->snd_cwnd_stamp = tcp_time_stamp; } -static int tcp_should_expand_sndbuf(struct sock *sk) +static int tcp_should_expand_sndbuf(const struct sock *sk) { - struct tcp_sock *tp = tcp_sk(sk); + const struct tcp_sock *tp = tcp_sk(sk); /* If the user specified a specific send buffer setting, do * not modify it. @@ -4870,11 +4932,11 @@ static int tcp_should_expand_sndbuf(struct sock *sk) return 0; /* If we are under global TCP memory pressure, do not expand. */ - if (tcp_memory_pressure) + if (sk_under_memory_pressure(sk)) return 0; /* If we are under soft global TCP memory pressure, do not expand. */ - if (atomic_long_read(&tcp_memory_allocated) >= sysctl_tcp_mem[0]) + if (sk_memory_allocated(sk) >= sk_prot_mem_limits(sk, 0)) return 0; /* If we filled the congestion window, do not expand. */ @@ -4895,8 +4957,10 @@ static void tcp_new_space(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); if (tcp_should_expand_sndbuf(sk)) { - int sndmem = max_t(u32, tp->rx_opt.mss_clamp, tp->mss_cache) + - MAX_TCP_HEADER + 16 + sizeof(struct sk_buff); + int sndmem = SKB_TRUESIZE(max_t(u32, + tp->rx_opt.mss_clamp, + tp->mss_cache) + + MAX_TCP_HEADER); int demanded = max_t(unsigned int, tp->snd_cwnd, tp->reordering + 1); sndmem *= 2 * demanded; @@ -4968,7 +5032,7 @@ static inline void tcp_ack_snd_check(struct sock *sk) * either form (or just set the sysctl tcp_stdurg). */ -static void tcp_check_urg(struct sock *sk, struct tcphdr *th) +static void tcp_check_urg(struct sock *sk, const struct tcphdr *th) { struct tcp_sock *tp = tcp_sk(sk); u32 ptr = ntohs(th->urg_ptr); @@ -5034,7 +5098,7 @@ static void tcp_check_urg(struct sock *sk, struct tcphdr *th) } /* This is the 'fast' part of urgent handling. */ -static void tcp_urg(struct sock *sk, struct sk_buff *skb, struct tcphdr *th) +static void tcp_urg(struct sock *sk, struct sk_buff *skb, const struct tcphdr *th) { struct tcp_sock *tp = tcp_sk(sk); @@ -5155,9 +5219,9 @@ out: * play significant role here. */ static int tcp_validate_incoming(struct sock *sk, struct sk_buff *skb, - struct tcphdr *th, int syn_inerr) + const struct tcphdr *th, int syn_inerr) { - u8 *hash_location; + const u8 *hash_location; struct tcp_sock *tp = tcp_sk(sk); /* RFC1323: H1. Apply PAWS check first. */ @@ -5238,7 +5302,7 @@ discard: * tcp_data_queue when everything is OK. */ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, - struct tcphdr *th, unsigned len) + const struct tcphdr *th, unsigned int len) { struct tcp_sock *tp = tcp_sk(sk); int res; @@ -5449,9 +5513,9 @@ discard: EXPORT_SYMBOL(tcp_rcv_established); static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, - struct tcphdr *th, unsigned len) + const struct tcphdr *th, unsigned int len) { - u8 *hash_location; + const u8 *hash_location; struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); struct tcp_cookie_values *cvp = tp->cookie_values; @@ -5726,7 +5790,7 @@ reset_and_undo: */ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, - struct tcphdr *th, unsigned len) + const struct tcphdr *th, unsigned int len) { struct tcp_sock *tp = tcp_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); @@ -5747,6 +5811,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, goto discard; if (th->syn) { + if (th->fin) + goto discard; if (icsk->icsk_af_ops->conn_request(sk, skb) < 0) return 1; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 7963e03f1068..1eb4ad57670e 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -73,6 +73,7 @@ #include <net/xfrm.h> #include <net/netdma.h> #include <net/secure_seq.h> +#include <net/tcp_memcontrol.h> #include <linux/inet.h> #include <linux/ipv6.h> @@ -92,7 +93,7 @@ EXPORT_SYMBOL(sysctl_tcp_low_latency); static struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr); static int tcp_v4_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key, - __be32 daddr, __be32 saddr, struct tcphdr *th); + __be32 daddr, __be32 saddr, const struct tcphdr *th); #else static inline struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr) @@ -104,7 +105,7 @@ struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr) struct inet_hashinfo tcp_hashinfo; EXPORT_SYMBOL(tcp_hashinfo); -static inline __u32 tcp_v4_init_sequence(struct sk_buff *skb) +static inline __u32 tcp_v4_init_sequence(const struct sk_buff *skb) { return secure_tcp_sequence_number(ip_hdr(skb)->daddr, ip_hdr(skb)->saddr, @@ -552,7 +553,7 @@ static void __tcp_v4_send_check(struct sk_buff *skb, /* This routine computes an IPv4 TCP checksum. */ void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb) { - struct inet_sock *inet = inet_sk(sk); + const struct inet_sock *inet = inet_sk(sk); __tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr); } @@ -590,7 +591,7 @@ int tcp_v4_gso_send_check(struct sk_buff *skb) static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) { - struct tcphdr *th = tcp_hdr(skb); + const struct tcphdr *th = tcp_hdr(skb); struct { struct tcphdr th; #ifdef CONFIG_TCP_MD5SIG @@ -652,6 +653,7 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0; net = dev_net(skb_dst(skb)->dev); + arg.tos = ip_hdr(skb)->tos; ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr, &arg, arg.iov[0].iov_len); @@ -666,9 +668,9 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts, int oif, struct tcp_md5sig_key *key, - int reply_flags) + int reply_flags, u8 tos) { - struct tcphdr *th = tcp_hdr(skb); + const struct tcphdr *th = tcp_hdr(skb); struct { struct tcphdr th; __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2) @@ -726,7 +728,7 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, arg.csumoffset = offsetof(struct tcphdr, check) / 2; if (oif) arg.bound_dev_if = oif; - + arg.tos = tos; ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr, &arg, arg.iov[0].iov_len); @@ -743,7 +745,8 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw), - tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0 + tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0, + tw->tw_tos ); inet_twsk_put(tw); @@ -757,7 +760,8 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, req->ts_recent, 0, tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr), - inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0); + inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0, + ip_hdr(skb)->tos); } /* @@ -1090,7 +1094,7 @@ static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp, } static int tcp_v4_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key, - __be32 daddr, __be32 saddr, struct tcphdr *th) + __be32 daddr, __be32 saddr, const struct tcphdr *th) { struct tcp_md5sig_pool *hp; struct hash_desc *desc; @@ -1122,12 +1126,12 @@ clear_hash_noput: } int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key, - struct sock *sk, struct request_sock *req, - struct sk_buff *skb) + const struct sock *sk, const struct request_sock *req, + const struct sk_buff *skb) { struct tcp_md5sig_pool *hp; struct hash_desc *desc; - struct tcphdr *th = tcp_hdr(skb); + const struct tcphdr *th = tcp_hdr(skb); __be32 saddr, daddr; if (sk) { @@ -1172,7 +1176,7 @@ clear_hash_noput: } EXPORT_SYMBOL(tcp_v4_md5_hash_skb); -static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb) +static int tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb) { /* * This gets called for each TCP segment that arrives @@ -1182,10 +1186,10 @@ static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb) * o MD5 hash and we're not expecting one. * o MD5 hash and its wrong. */ - __u8 *hash_location = NULL; + const __u8 *hash_location = NULL; struct tcp_md5sig_key *hash_expected; const struct iphdr *iph = ip_hdr(skb); - struct tcphdr *th = tcp_hdr(skb); + const struct tcphdr *th = tcp_hdr(skb); int genhash; unsigned char newhash[16]; @@ -1248,7 +1252,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) { struct tcp_extend_values tmp_ext; struct tcp_options_received tmp_opt; - u8 *hash_location; + const u8 *hash_location; struct request_sock *req; struct inet_request_sock *ireq; struct tcp_sock *tp = tcp_sk(sk); @@ -1507,6 +1511,9 @@ exit: NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); return NULL; put_and_exit: + tcp_clear_xmit_timers(newsk); + tcp_cleanup_congestion_control(newsk); + bh_unlock_sock(newsk); sock_put(newsk); goto exit; } @@ -1588,7 +1595,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) #endif if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ - sock_rps_save_rxhash(sk, skb->rxhash); + sock_rps_save_rxhash(sk, skb); if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) { rsk = sk; goto reset; @@ -1605,7 +1612,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) goto discard; if (nsk != sk) { - sock_rps_save_rxhash(nsk, skb->rxhash); + sock_rps_save_rxhash(nsk, skb); if (tcp_child_process(sk, nsk, skb)) { rsk = nsk; goto reset; @@ -1613,7 +1620,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) return 0; } } else - sock_rps_save_rxhash(sk, skb->rxhash); + sock_rps_save_rxhash(sk, skb); if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) { rsk = sk; @@ -1645,7 +1652,7 @@ EXPORT_SYMBOL(tcp_v4_do_rcv); int tcp_v4_rcv(struct sk_buff *skb) { const struct iphdr *iph; - struct tcphdr *th; + const struct tcphdr *th; struct sock *sk; int ret; struct net *net = dev_net(skb->dev); @@ -1680,7 +1687,7 @@ int tcp_v4_rcv(struct sk_buff *skb) skb->len - th->doff * 4); TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); TCP_SKB_CB(skb)->when = 0; - TCP_SKB_CB(skb)->flags = iph->tos; + TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph); TCP_SKB_CB(skb)->sacked = 0; sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest); @@ -1809,7 +1816,7 @@ EXPORT_SYMBOL(tcp_v4_get_peer); void *tcp_v4_tw_get_peer(struct sock *sk) { - struct inet_timewait_sock *tw = inet_twsk(sk); + const struct inet_timewait_sock *tw = inet_twsk(sk); return inet_getpeer_v4(tw->tw_daddr, 1); } @@ -1911,7 +1918,8 @@ static int tcp_v4_init_sock(struct sock *sk) sk->sk_rcvbuf = sysctl_tcp_rmem[1]; local_bh_disable(); - percpu_counter_inc(&tcp_sockets_allocated); + sock_update_memcg(sk); + sk_sockets_allocated_inc(sk); local_bh_enable(); return 0; @@ -1967,7 +1975,8 @@ void tcp_v4_destroy_sock(struct sock *sk) tp->cookie_values = NULL; } - percpu_counter_dec(&tcp_sockets_allocated); + sk_sockets_allocated_dec(sk); + sock_release_memcg(sk); } EXPORT_SYMBOL(tcp_v4_destroy_sock); @@ -2336,7 +2345,7 @@ static void tcp_seq_stop(struct seq_file *seq, void *v) } } -static int tcp_seq_open(struct inode *inode, struct file *file) +int tcp_seq_open(struct inode *inode, struct file *file) { struct tcp_seq_afinfo *afinfo = PDE(inode)->data; struct tcp_iter_state *s; @@ -2352,23 +2361,19 @@ static int tcp_seq_open(struct inode *inode, struct file *file) s->last_pos = 0; return 0; } +EXPORT_SYMBOL(tcp_seq_open); int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo) { int rc = 0; struct proc_dir_entry *p; - afinfo->seq_fops.open = tcp_seq_open; - afinfo->seq_fops.read = seq_read; - afinfo->seq_fops.llseek = seq_lseek; - afinfo->seq_fops.release = seq_release_net; - afinfo->seq_ops.start = tcp_seq_start; afinfo->seq_ops.next = tcp_seq_next; afinfo->seq_ops.stop = tcp_seq_stop; p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net, - &afinfo->seq_fops, afinfo); + afinfo->seq_fops, afinfo); if (!p) rc = -ENOMEM; return rc; @@ -2381,7 +2386,7 @@ void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo) } EXPORT_SYMBOL(tcp_proc_unregister); -static void get_openreq4(struct sock *sk, struct request_sock *req, +static void get_openreq4(const struct sock *sk, const struct request_sock *req, struct seq_file *f, int i, int uid, int *len) { const struct inet_request_sock *ireq = inet_rsk(req); @@ -2411,9 +2416,9 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len) { int timer_active; unsigned long timer_expires; - struct tcp_sock *tp = tcp_sk(sk); + const struct tcp_sock *tp = tcp_sk(sk); const struct inet_connection_sock *icsk = inet_csk(sk); - struct inet_sock *inet = inet_sk(sk); + const struct inet_sock *inet = inet_sk(sk); __be32 dest = inet->inet_daddr; __be32 src = inet->inet_rcv_saddr; __u16 destp = ntohs(inet->inet_dport); @@ -2462,7 +2467,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len) len); } -static void get_timewait4_sock(struct inet_timewait_sock *tw, +static void get_timewait4_sock(const struct inet_timewait_sock *tw, struct seq_file *f, int i, int *len) { __be32 dest, src; @@ -2517,12 +2522,18 @@ out: return 0; } +static const struct file_operations tcp_afinfo_seq_fops = { + .owner = THIS_MODULE, + .open = tcp_seq_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_net +}; + static struct tcp_seq_afinfo tcp4_seq_afinfo = { .name = "tcp", .family = AF_INET, - .seq_fops = { - .owner = THIS_MODULE, - }, + .seq_fops = &tcp_afinfo_seq_fops, .seq_ops = { .show = tcp4_seq_show, }, @@ -2612,7 +2623,6 @@ struct proto tcp_prot = { .orphan_count = &tcp_orphan_count, .memory_allocated = &tcp_memory_allocated, .memory_pressure = &tcp_memory_pressure, - .sysctl_mem = sysctl_tcp_mem, .sysctl_wmem = sysctl_tcp_wmem, .sysctl_rmem = sysctl_tcp_rmem, .max_header = MAX_TCP_HEADER, @@ -2626,10 +2636,14 @@ struct proto tcp_prot = { .compat_setsockopt = compat_tcp_setsockopt, .compat_getsockopt = compat_tcp_getsockopt, #endif +#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM + .init_cgroup = tcp_init_cgroup, + .destroy_cgroup = tcp_destroy_cgroup, + .proto_cgroup = tcp_proto_cgroup, +#endif }; EXPORT_SYMBOL(tcp_prot); - static int __net_init tcp_sk_init(struct net *net) { return inet_ctl_sock_create(&net->ipv4.tcp_sock, diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c new file mode 100644 index 000000000000..49978788a9dc --- /dev/null +++ b/net/ipv4/tcp_memcontrol.c @@ -0,0 +1,272 @@ +#include <net/tcp.h> +#include <net/tcp_memcontrol.h> +#include <net/sock.h> +#include <net/ip.h> +#include <linux/nsproxy.h> +#include <linux/memcontrol.h> +#include <linux/module.h> + +static u64 tcp_cgroup_read(struct cgroup *cont, struct cftype *cft); +static int tcp_cgroup_write(struct cgroup *cont, struct cftype *cft, + const char *buffer); +static int tcp_cgroup_reset(struct cgroup *cont, unsigned int event); + +static struct cftype tcp_files[] = { + { + .name = "kmem.tcp.limit_in_bytes", + .write_string = tcp_cgroup_write, + .read_u64 = tcp_cgroup_read, + .private = RES_LIMIT, + }, + { + .name = "kmem.tcp.usage_in_bytes", + .read_u64 = tcp_cgroup_read, + .private = RES_USAGE, + }, + { + .name = "kmem.tcp.failcnt", + .private = RES_FAILCNT, + .trigger = tcp_cgroup_reset, + .read_u64 = tcp_cgroup_read, + }, + { + .name = "kmem.tcp.max_usage_in_bytes", + .private = RES_MAX_USAGE, + .trigger = tcp_cgroup_reset, + .read_u64 = tcp_cgroup_read, + }, +}; + +static inline struct tcp_memcontrol *tcp_from_cgproto(struct cg_proto *cg_proto) +{ + return container_of(cg_proto, struct tcp_memcontrol, cg_proto); +} + +static void memcg_tcp_enter_memory_pressure(struct sock *sk) +{ + if (sk->sk_cgrp->memory_pressure) + *sk->sk_cgrp->memory_pressure = 1; +} +EXPORT_SYMBOL(memcg_tcp_enter_memory_pressure); + +int tcp_init_cgroup(struct cgroup *cgrp, struct cgroup_subsys *ss) +{ + /* + * The root cgroup does not use res_counters, but rather, + * rely on the data already collected by the network + * subsystem + */ + struct res_counter *res_parent = NULL; + struct cg_proto *cg_proto, *parent_cg; + struct tcp_memcontrol *tcp; + struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp); + struct mem_cgroup *parent = parent_mem_cgroup(memcg); + struct net *net = current->nsproxy->net_ns; + + cg_proto = tcp_prot.proto_cgroup(memcg); + if (!cg_proto) + goto create_files; + + tcp = tcp_from_cgproto(cg_proto); + + tcp->tcp_prot_mem[0] = net->ipv4.sysctl_tcp_mem[0]; + tcp->tcp_prot_mem[1] = net->ipv4.sysctl_tcp_mem[1]; + tcp->tcp_prot_mem[2] = net->ipv4.sysctl_tcp_mem[2]; + tcp->tcp_memory_pressure = 0; + + parent_cg = tcp_prot.proto_cgroup(parent); + if (parent_cg) + res_parent = parent_cg->memory_allocated; + + res_counter_init(&tcp->tcp_memory_allocated, res_parent); + percpu_counter_init(&tcp->tcp_sockets_allocated, 0); + + cg_proto->enter_memory_pressure = memcg_tcp_enter_memory_pressure; + cg_proto->memory_pressure = &tcp->tcp_memory_pressure; + cg_proto->sysctl_mem = tcp->tcp_prot_mem; + cg_proto->memory_allocated = &tcp->tcp_memory_allocated; + cg_proto->sockets_allocated = &tcp->tcp_sockets_allocated; + cg_proto->memcg = memcg; + +create_files: + return cgroup_add_files(cgrp, ss, tcp_files, + ARRAY_SIZE(tcp_files)); +} +EXPORT_SYMBOL(tcp_init_cgroup); + +void tcp_destroy_cgroup(struct cgroup *cgrp, struct cgroup_subsys *ss) +{ + struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp); + struct cg_proto *cg_proto; + struct tcp_memcontrol *tcp; + u64 val; + + cg_proto = tcp_prot.proto_cgroup(memcg); + if (!cg_proto) + return; + + tcp = tcp_from_cgproto(cg_proto); + percpu_counter_destroy(&tcp->tcp_sockets_allocated); + + val = res_counter_read_u64(&tcp->tcp_memory_allocated, RES_LIMIT); + + if (val != RESOURCE_MAX) + jump_label_dec(&memcg_socket_limit_enabled); +} +EXPORT_SYMBOL(tcp_destroy_cgroup); + +static int tcp_update_limit(struct mem_cgroup *memcg, u64 val) +{ + struct net *net = current->nsproxy->net_ns; + struct tcp_memcontrol *tcp; + struct cg_proto *cg_proto; + u64 old_lim; + int i; + int ret; + + cg_proto = tcp_prot.proto_cgroup(memcg); + if (!cg_proto) + return -EINVAL; + + if (val > RESOURCE_MAX) + val = RESOURCE_MAX; + + tcp = tcp_from_cgproto(cg_proto); + + old_lim = res_counter_read_u64(&tcp->tcp_memory_allocated, RES_LIMIT); + ret = res_counter_set_limit(&tcp->tcp_memory_allocated, val); + if (ret) + return ret; + + for (i = 0; i < 3; i++) + tcp->tcp_prot_mem[i] = min_t(long, val >> PAGE_SHIFT, + net->ipv4.sysctl_tcp_mem[i]); + + if (val == RESOURCE_MAX && old_lim != RESOURCE_MAX) + jump_label_dec(&memcg_socket_limit_enabled); + else if (old_lim == RESOURCE_MAX && val != RESOURCE_MAX) + jump_label_inc(&memcg_socket_limit_enabled); + + return 0; +} + +static int tcp_cgroup_write(struct cgroup *cont, struct cftype *cft, + const char *buffer) +{ + struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); + unsigned long long val; + int ret = 0; + + switch (cft->private) { + case RES_LIMIT: + /* see memcontrol.c */ + ret = res_counter_memparse_write_strategy(buffer, &val); + if (ret) + break; + ret = tcp_update_limit(memcg, val); + break; + default: + ret = -EINVAL; + break; + } + return ret; +} + +static u64 tcp_read_stat(struct mem_cgroup *memcg, int type, u64 default_val) +{ + struct tcp_memcontrol *tcp; + struct cg_proto *cg_proto; + + cg_proto = tcp_prot.proto_cgroup(memcg); + if (!cg_proto) + return default_val; + + tcp = tcp_from_cgproto(cg_proto); + return res_counter_read_u64(&tcp->tcp_memory_allocated, type); +} + +static u64 tcp_read_usage(struct mem_cgroup *memcg) +{ + struct tcp_memcontrol *tcp; + struct cg_proto *cg_proto; + + cg_proto = tcp_prot.proto_cgroup(memcg); + if (!cg_proto) + return atomic_long_read(&tcp_memory_allocated) << PAGE_SHIFT; + + tcp = tcp_from_cgproto(cg_proto); + return res_counter_read_u64(&tcp->tcp_memory_allocated, RES_USAGE); +} + +static u64 tcp_cgroup_read(struct cgroup *cont, struct cftype *cft) +{ + struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); + u64 val; + + switch (cft->private) { + case RES_LIMIT: + val = tcp_read_stat(memcg, RES_LIMIT, RESOURCE_MAX); + break; + case RES_USAGE: + val = tcp_read_usage(memcg); + break; + case RES_FAILCNT: + case RES_MAX_USAGE: + val = tcp_read_stat(memcg, cft->private, 0); + break; + default: + BUG(); + } + return val; +} + +static int tcp_cgroup_reset(struct cgroup *cont, unsigned int event) +{ + struct mem_cgroup *memcg; + struct tcp_memcontrol *tcp; + struct cg_proto *cg_proto; + + memcg = mem_cgroup_from_cont(cont); + cg_proto = tcp_prot.proto_cgroup(memcg); + if (!cg_proto) + return 0; + tcp = tcp_from_cgproto(cg_proto); + + switch (event) { + case RES_MAX_USAGE: + res_counter_reset_max(&tcp->tcp_memory_allocated); + break; + case RES_FAILCNT: + res_counter_reset_failcnt(&tcp->tcp_memory_allocated); + break; + } + + return 0; +} + +unsigned long long tcp_max_memory(const struct mem_cgroup *memcg) +{ + struct tcp_memcontrol *tcp; + struct cg_proto *cg_proto; + + cg_proto = tcp_prot.proto_cgroup((struct mem_cgroup *)memcg); + if (!cg_proto) + return 0; + + tcp = tcp_from_cgproto(cg_proto); + return res_counter_read_u64(&tcp->tcp_memory_allocated, RES_LIMIT); +} + +void tcp_prot_mem(struct mem_cgroup *memcg, long val, int idx) +{ + struct tcp_memcontrol *tcp; + struct cg_proto *cg_proto; + + cg_proto = tcp_prot.proto_cgroup(memcg); + if (!cg_proto) + return; + + tcp = tcp_from_cgproto(cg_proto); + + tcp->tcp_prot_mem[idx] = val; +} diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 0ce3d06dce60..550e755747e0 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -141,7 +141,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, const struct tcphdr *th) { struct tcp_options_received tmp_opt; - u8 *hash_location; + const u8 *hash_location; struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); int paws_reject = 0; @@ -336,15 +336,16 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) tcptw->tw_ts_recent = tp->rx_opt.ts_recent; tcptw->tw_ts_recent_stamp = tp->rx_opt.ts_recent_stamp; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) if (tw->tw_family == PF_INET6) { struct ipv6_pinfo *np = inet6_sk(sk); struct inet6_timewait_sock *tw6; tw->tw_ipv6_offset = inet6_tw_offset(sk->sk_prot); tw6 = inet6_twsk((struct sock *)tw); - ipv6_addr_copy(&tw6->tw_v6_daddr, &np->daddr); - ipv6_addr_copy(&tw6->tw_v6_rcv_saddr, &np->rcv_saddr); + tw6->tw_v6_daddr = np->daddr; + tw6->tw_v6_rcv_saddr = np->rcv_saddr; + tw->tw_tclass = np->tclass; tw->tw_ipv6only = np->ipv6only; } #endif @@ -424,7 +425,7 @@ static inline void TCP_ECN_openreq_child(struct tcp_sock *tp, */ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, struct sk_buff *skb) { - struct sock *newsk = inet_csk_clone(sk, req, GFP_ATOMIC); + struct sock *newsk = inet_csk_clone_lock(sk, req, GFP_ATOMIC); if (newsk != NULL) { const struct inet_request_sock *ireq = inet_rsk(req); @@ -494,7 +495,9 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, newtp->frto_counter = 0; newtp->frto_highmark = 0; - newicsk->icsk_ca_ops = &tcp_init_congestion_ops; + if (newicsk->icsk_ca_ops != &tcp_init_congestion_ops && + !try_module_get(newicsk->icsk_ca_ops->owner)) + newicsk->icsk_ca_ops = &tcp_init_congestion_ops; tcp_set_ca_state(newsk, TCP_CA_Open); tcp_init_xmit_timers(newsk); @@ -567,7 +570,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, struct request_sock **prev) { struct tcp_options_received tmp_opt; - u8 *hash_location; + const u8 *hash_location; struct sock *child; const struct tcphdr *th = tcp_hdr(skb); __be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 882e0b0964d0..8c8de2780c7a 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -65,7 +65,7 @@ EXPORT_SYMBOL_GPL(sysctl_tcp_cookie_size); /* Account for new data that has been sent to the network. */ -static void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb) +static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); unsigned int prior_packets = tp->packets_out; @@ -89,9 +89,9 @@ static void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb) * Anything in between SND.UNA...SND.UNA+SND.WND also can be already * invalid. OK, let's make this for now: */ -static inline __u32 tcp_acceptable_seq(struct sock *sk) +static inline __u32 tcp_acceptable_seq(const struct sock *sk) { - struct tcp_sock *tp = tcp_sk(sk); + const struct tcp_sock *tp = tcp_sk(sk); if (!before(tcp_wnd_end(tp), tp->snd_nxt)) return tp->snd_nxt; @@ -116,7 +116,7 @@ static inline __u32 tcp_acceptable_seq(struct sock *sk) static __u16 tcp_advertise_mss(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); - struct dst_entry *dst = __sk_dst_get(sk); + const struct dst_entry *dst = __sk_dst_get(sk); int mss = tp->advmss; if (dst) { @@ -133,7 +133,7 @@ static __u16 tcp_advertise_mss(struct sock *sk) /* RFC2861. Reset CWND after idle period longer RTO to "restart window". * This is the first part of cwnd validation mechanism. */ -static void tcp_cwnd_restart(struct sock *sk, struct dst_entry *dst) +static void tcp_cwnd_restart(struct sock *sk, const struct dst_entry *dst) { struct tcp_sock *tp = tcp_sk(sk); s32 delta = tcp_time_stamp - tp->lsndtime; @@ -154,7 +154,7 @@ static void tcp_cwnd_restart(struct sock *sk, struct dst_entry *dst) /* Congestion state accounting after a packet has been sent. */ static void tcp_event_data_sent(struct tcp_sock *tp, - struct sk_buff *skb, struct sock *sk) + struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); const u32 now = tcp_time_stamp; @@ -295,11 +295,11 @@ static u16 tcp_select_window(struct sock *sk) } /* Packet ECN state for a SYN-ACK */ -static inline void TCP_ECN_send_synack(struct tcp_sock *tp, struct sk_buff *skb) +static inline void TCP_ECN_send_synack(const struct tcp_sock *tp, struct sk_buff *skb) { - TCP_SKB_CB(skb)->flags &= ~TCPHDR_CWR; + TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_CWR; if (!(tp->ecn_flags & TCP_ECN_OK)) - TCP_SKB_CB(skb)->flags &= ~TCPHDR_ECE; + TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ECE; } /* Packet ECN state for a SYN. */ @@ -309,13 +309,13 @@ static inline void TCP_ECN_send_syn(struct sock *sk, struct sk_buff *skb) tp->ecn_flags = 0; if (sysctl_tcp_ecn == 1) { - TCP_SKB_CB(skb)->flags |= TCPHDR_ECE | TCPHDR_CWR; + TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ECE | TCPHDR_CWR; tp->ecn_flags = TCP_ECN_OK; } } static __inline__ void -TCP_ECN_make_synack(struct request_sock *req, struct tcphdr *th) +TCP_ECN_make_synack(const struct request_sock *req, struct tcphdr *th) { if (inet_rsk(req)->ecn_ok) th->ece = 1; @@ -356,7 +356,7 @@ static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags) skb->ip_summed = CHECKSUM_PARTIAL; skb->csum = 0; - TCP_SKB_CB(skb)->flags = flags; + TCP_SKB_CB(skb)->tcp_flags = flags; TCP_SKB_CB(skb)->sacked = 0; skb_shinfo(skb)->gso_segs = 1; @@ -565,7 +565,8 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp, */ static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb, struct tcp_out_options *opts, - struct tcp_md5sig_key **md5) { + struct tcp_md5sig_key **md5) +{ struct tcp_sock *tp = tcp_sk(sk); struct tcp_cookie_values *cvp = tp->cookie_values; unsigned remaining = MAX_TCP_OPTION_SPACE; @@ -743,7 +744,8 @@ static unsigned tcp_synack_options(struct sock *sk, */ static unsigned tcp_established_options(struct sock *sk, struct sk_buff *skb, struct tcp_out_options *opts, - struct tcp_md5sig_key **md5) { + struct tcp_md5sig_key **md5) +{ struct tcp_skb_cb *tcb = skb ? TCP_SKB_CB(skb) : NULL; struct tcp_sock *tp = tcp_sk(sk); unsigned size = 0; @@ -826,7 +828,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, tcb = TCP_SKB_CB(skb); memset(&opts, 0, sizeof(opts)); - if (unlikely(tcb->flags & TCPHDR_SYN)) + if (unlikely(tcb->tcp_flags & TCPHDR_SYN)) tcp_options_size = tcp_syn_options(sk, skb, &opts, &md5); else tcp_options_size = tcp_established_options(sk, skb, &opts, @@ -850,9 +852,9 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, th->seq = htonl(tcb->seq); th->ack_seq = htonl(tp->rcv_nxt); *(((__be16 *)th) + 6) = htons(((tcp_header_size >> 2) << 12) | - tcb->flags); + tcb->tcp_flags); - if (unlikely(tcb->flags & TCPHDR_SYN)) { + if (unlikely(tcb->tcp_flags & TCPHDR_SYN)) { /* RFC1323: The window in SYN & SYN/ACK segments * is never scaled. */ @@ -875,7 +877,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, } tcp_options_write((__be32 *)(th + 1), tp, &opts); - if (likely((tcb->flags & TCPHDR_SYN) == 0)) + if (likely((tcb->tcp_flags & TCPHDR_SYN) == 0)) TCP_ECN_send(sk, skb, tcp_header_size); #ifdef CONFIG_TCP_MD5SIG @@ -889,11 +891,11 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, icsk->icsk_af_ops->send_check(sk, skb); - if (likely(tcb->flags & TCPHDR_ACK)) + if (likely(tcb->tcp_flags & TCPHDR_ACK)) tcp_event_ack_sent(sk, tcp_skb_pcount(skb)); if (skb->len != tcp_header_size) - tcp_event_data_sent(tp, skb, sk); + tcp_event_data_sent(tp, sk); if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq) TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS, @@ -926,7 +928,7 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb) } /* Initialize TSO segments for a packet. */ -static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb, +static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb, unsigned int mss_now) { if (skb->len <= mss_now || !sk_can_gso(sk) || @@ -947,7 +949,7 @@ static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb, /* When a modification to fackets out becomes necessary, we need to check * skb is counted to fackets_out or not. */ -static void tcp_adjust_fackets_out(struct sock *sk, struct sk_buff *skb, +static void tcp_adjust_fackets_out(struct sock *sk, const struct sk_buff *skb, int decr) { struct tcp_sock *tp = tcp_sk(sk); @@ -962,7 +964,7 @@ static void tcp_adjust_fackets_out(struct sock *sk, struct sk_buff *skb, /* Pcount in the middle of the write queue got changed, we need to do various * tweaks to fix counters */ -static void tcp_adjust_pcount(struct sock *sk, struct sk_buff *skb, int decr) +static void tcp_adjust_pcount(struct sock *sk, const struct sk_buff *skb, int decr) { struct tcp_sock *tp = tcp_sk(sk); @@ -1032,9 +1034,9 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq; /* PSH and FIN should only be set in the second packet. */ - flags = TCP_SKB_CB(skb)->flags; - TCP_SKB_CB(skb)->flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH); - TCP_SKB_CB(buff)->flags = flags; + flags = TCP_SKB_CB(skb)->tcp_flags; + TCP_SKB_CB(skb)->tcp_flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH); + TCP_SKB_CB(buff)->tcp_flags = flags; TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked; if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_PARTIAL) { @@ -1091,17 +1093,26 @@ static void __pskb_trim_head(struct sk_buff *skb, int len) { int i, k, eat; + eat = min_t(int, len, skb_headlen(skb)); + if (eat) { + __skb_pull(skb, eat); + len -= eat; + if (!len) + return; + } eat = len; k = 0; for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { - if (skb_shinfo(skb)->frags[i].size <= eat) { - put_page(skb_shinfo(skb)->frags[i].page); - eat -= skb_shinfo(skb)->frags[i].size; + int size = skb_frag_size(&skb_shinfo(skb)->frags[i]); + + if (size <= eat) { + skb_frag_unref(skb, i); + eat -= size; } else { skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i]; if (eat) { skb_shinfo(skb)->frags[k].page_offset += eat; - skb_shinfo(skb)->frags[k].size -= eat; + skb_frag_size_sub(&skb_shinfo(skb)->frags[k], eat); eat = 0; } k++; @@ -1120,11 +1131,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len) if (skb_cloned(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) return -ENOMEM; - /* If len == headlen, we avoid __skb_pull to preserve alignment. */ - if (unlikely(len < skb_headlen(skb))) - __skb_pull(skb, len); - else - __pskb_trim_head(skb, len - skb_headlen(skb)); + __pskb_trim_head(skb, len); TCP_SKB_CB(skb)->seq += len; skb->ip_summed = CHECKSUM_PARTIAL; @@ -1144,10 +1151,10 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len) } /* Calculate MSS. Not accounting for SACKs here. */ -int tcp_mtu_to_mss(struct sock *sk, int pmtu) +int tcp_mtu_to_mss(const struct sock *sk, int pmtu) { - struct tcp_sock *tp = tcp_sk(sk); - struct inet_connection_sock *icsk = inet_csk(sk); + const struct tcp_sock *tp = tcp_sk(sk); + const struct inet_connection_sock *icsk = inet_csk(sk); int mss_now; /* Calculate base mss without TCP options: @@ -1173,10 +1180,10 @@ int tcp_mtu_to_mss(struct sock *sk, int pmtu) } /* Inverse of above */ -int tcp_mss_to_mtu(struct sock *sk, int mss) +int tcp_mss_to_mtu(const struct sock *sk, int mss) { - struct tcp_sock *tp = tcp_sk(sk); - struct inet_connection_sock *icsk = inet_csk(sk); + const struct tcp_sock *tp = tcp_sk(sk); + const struct inet_connection_sock *icsk = inet_csk(sk); int mtu; mtu = mss + @@ -1250,8 +1257,8 @@ EXPORT_SYMBOL(tcp_sync_mss); */ unsigned int tcp_current_mss(struct sock *sk) { - struct tcp_sock *tp = tcp_sk(sk); - struct dst_entry *dst = __sk_dst_get(sk); + const struct tcp_sock *tp = tcp_sk(sk); + const struct dst_entry *dst = __sk_dst_get(sk); u32 mss_now; unsigned header_len; struct tcp_out_options opts; @@ -1311,10 +1318,10 @@ static void tcp_cwnd_validate(struct sock *sk) * modulo only when the receiver window alone is the limiting factor or * when we would be allowed to send the split-due-to-Nagle skb fully. */ -static unsigned int tcp_mss_split_point(struct sock *sk, struct sk_buff *skb, +static unsigned int tcp_mss_split_point(const struct sock *sk, const struct sk_buff *skb, unsigned int mss_now, unsigned int cwnd) { - struct tcp_sock *tp = tcp_sk(sk); + const struct tcp_sock *tp = tcp_sk(sk); u32 needed, window, cwnd_len; window = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq; @@ -1334,13 +1341,14 @@ static unsigned int tcp_mss_split_point(struct sock *sk, struct sk_buff *skb, /* Can at least one segment of SKB be sent right now, according to the * congestion window rules? If so, return how many segments are allowed. */ -static inline unsigned int tcp_cwnd_test(struct tcp_sock *tp, - struct sk_buff *skb) +static inline unsigned int tcp_cwnd_test(const struct tcp_sock *tp, + const struct sk_buff *skb) { u32 in_flight, cwnd; /* Don't be strict about the congestion window for the final FIN. */ - if ((TCP_SKB_CB(skb)->flags & TCPHDR_FIN) && tcp_skb_pcount(skb) == 1) + if ((TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) && + tcp_skb_pcount(skb) == 1) return 1; in_flight = tcp_packets_in_flight(tp); @@ -1355,7 +1363,7 @@ static inline unsigned int tcp_cwnd_test(struct tcp_sock *tp, * This must be invoked the first time we consider transmitting * SKB onto the wire. */ -static int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb, +static int tcp_init_tso_segs(const struct sock *sk, struct sk_buff *skb, unsigned int mss_now) { int tso_segs = tcp_skb_pcount(skb); @@ -1377,7 +1385,7 @@ static inline int tcp_minshall_check(const struct tcp_sock *tp) /* Return 0, if packet can be sent now without violation Nagle's rules: * 1. It is full sized. * 2. Or it contains FIN. (already checked by caller) - * 3. Or TCP_NODELAY was set. + * 3. Or TCP_CORK is not set, and TCP_NODELAY is set. * 4. Or TCP_CORK is not set, and all sent packets are ACKed. * With Minshall's modification: all sent small packets are ACKed. */ @@ -1393,7 +1401,7 @@ static inline int tcp_nagle_check(const struct tcp_sock *tp, /* Return non-zero if the Nagle test allows this packet to be * sent now. */ -static inline int tcp_nagle_test(struct tcp_sock *tp, struct sk_buff *skb, +static inline int tcp_nagle_test(const struct tcp_sock *tp, const struct sk_buff *skb, unsigned int cur_mss, int nonagle) { /* Nagle rule does not apply to frames, which sit in the middle of the @@ -1409,7 +1417,7 @@ static inline int tcp_nagle_test(struct tcp_sock *tp, struct sk_buff *skb, * Nagle can be ignored during F-RTO too (see RFC4138). */ if (tcp_urg_mode(tp) || (tp->frto_counter == 2) || - (TCP_SKB_CB(skb)->flags & TCPHDR_FIN)) + (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)) return 1; if (!tcp_nagle_check(tp, skb, cur_mss, nonagle)) @@ -1419,7 +1427,7 @@ static inline int tcp_nagle_test(struct tcp_sock *tp, struct sk_buff *skb, } /* Does at least the first segment of SKB fit into the send window? */ -static inline int tcp_snd_wnd_test(struct tcp_sock *tp, struct sk_buff *skb, +static inline int tcp_snd_wnd_test(const struct tcp_sock *tp, const struct sk_buff *skb, unsigned int cur_mss) { u32 end_seq = TCP_SKB_CB(skb)->end_seq; @@ -1434,10 +1442,10 @@ static inline int tcp_snd_wnd_test(struct tcp_sock *tp, struct sk_buff *skb, * should be put on the wire right now. If so, it returns the number of * packets allowed by the congestion window. */ -static unsigned int tcp_snd_test(struct sock *sk, struct sk_buff *skb, +static unsigned int tcp_snd_test(const struct sock *sk, struct sk_buff *skb, unsigned int cur_mss, int nonagle) { - struct tcp_sock *tp = tcp_sk(sk); + const struct tcp_sock *tp = tcp_sk(sk); unsigned int cwnd_quota; tcp_init_tso_segs(sk, skb, cur_mss); @@ -1455,7 +1463,7 @@ static unsigned int tcp_snd_test(struct sock *sk, struct sk_buff *skb, /* Test if sending is allowed right now. */ int tcp_may_send_now(struct sock *sk) { - struct tcp_sock *tp = tcp_sk(sk); + const struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb = tcp_send_head(sk); return skb && @@ -1497,9 +1505,9 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq; /* PSH and FIN should only be set in the second packet. */ - flags = TCP_SKB_CB(skb)->flags; - TCP_SKB_CB(skb)->flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH); - TCP_SKB_CB(buff)->flags = flags; + flags = TCP_SKB_CB(skb)->tcp_flags; + TCP_SKB_CB(skb)->tcp_flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH); + TCP_SKB_CB(buff)->tcp_flags = flags; /* This packet was never sent out yet, so no SACK bits. */ TCP_SKB_CB(buff)->sacked = 0; @@ -1530,7 +1538,7 @@ static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb) u32 send_win, cong_win, limit, in_flight; int win_divisor; - if (TCP_SKB_CB(skb)->flags & TCPHDR_FIN) + if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) goto send_now; if (icsk->icsk_ca_state != TCP_CA_Open) @@ -1576,7 +1584,7 @@ static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb) * frame, so if we have space for more than 3 frames * then send now. */ - if (limit > tcp_max_burst(tp) * tp->mss_cache) + if (limit > tcp_max_tso_deferred_mss(tp) * tp->mss_cache) goto send_now; } @@ -1657,7 +1665,7 @@ static int tcp_mtu_probe(struct sock *sk) TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(skb)->seq; TCP_SKB_CB(nskb)->end_seq = TCP_SKB_CB(skb)->seq + probe_size; - TCP_SKB_CB(nskb)->flags = TCPHDR_ACK; + TCP_SKB_CB(nskb)->tcp_flags = TCPHDR_ACK; TCP_SKB_CB(nskb)->sacked = 0; nskb->csum = 0; nskb->ip_summed = skb->ip_summed; @@ -1677,11 +1685,11 @@ static int tcp_mtu_probe(struct sock *sk) if (skb->len <= copy) { /* We've eaten all the data from this skb. * Throw it away. */ - TCP_SKB_CB(nskb)->flags |= TCP_SKB_CB(skb)->flags; + TCP_SKB_CB(nskb)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags; tcp_unlink_write_queue(skb, sk); sk_wmem_free_skb(sk, skb); } else { - TCP_SKB_CB(nskb)->flags |= TCP_SKB_CB(skb)->flags & + TCP_SKB_CB(nskb)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags & ~(TCPHDR_FIN|TCPHDR_PSH); if (!skb_shinfo(skb)->nr_frags) { skb_pull(skb, copy); @@ -1796,11 +1804,13 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, tcp_event_new_data_sent(sk, skb); tcp_minshall_update(tp, mss_now, skb); - sent_pkts++; + sent_pkts += tcp_skb_pcount(skb); if (push_one) break; } + if (inet_csk(sk)->icsk_ca_state == TCP_CA_Recovery) + tp->prr_out += sent_pkts; if (likely(sent_pkts)) { tcp_cwnd_validate(sk); @@ -1912,7 +1922,7 @@ u32 __tcp_select_window(struct sock *sk) if (free_space < (full_space >> 1)) { icsk->icsk_ack.quick = 0; - if (tcp_memory_pressure) + if (sk_under_memory_pressure(sk)) tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss); @@ -1985,7 +1995,7 @@ static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb) TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(next_skb)->end_seq; /* Merge over control information. This moves PSH/FIN etc. over */ - TCP_SKB_CB(skb)->flags |= TCP_SKB_CB(next_skb)->flags; + TCP_SKB_CB(skb)->tcp_flags |= TCP_SKB_CB(next_skb)->tcp_flags; /* All done, get rid of second SKB and account for it so * packet counting does not break. @@ -2003,7 +2013,7 @@ static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb) } /* Check if coalescing SKBs is legal. */ -static int tcp_can_collapse(struct sock *sk, struct sk_buff *skb) +static int tcp_can_collapse(const struct sock *sk, const struct sk_buff *skb) { if (tcp_skb_pcount(skb) > 1) return 0; @@ -2033,7 +2043,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to, if (!sysctl_tcp_retrans_collapse) return; - if (TCP_SKB_CB(skb)->flags & TCPHDR_SYN) + if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN) return; tcp_for_write_queue_from_safe(skb, tmp, sk) { @@ -2125,12 +2135,12 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) * since it is cheap to do so and saves bytes on the network. */ if (skb->len > 0 && - (TCP_SKB_CB(skb)->flags & TCPHDR_FIN) && + (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) && tp->snd_una == (TCP_SKB_CB(skb)->end_seq - 1)) { if (!pskb_trim(skb, 0)) { /* Reuse, even though it does some unnecessary work */ tcp_init_nondata_skb(skb, TCP_SKB_CB(skb)->end_seq - 1, - TCP_SKB_CB(skb)->flags); + TCP_SKB_CB(skb)->tcp_flags); skb->ip_summed = CHECKSUM_NONE; } } @@ -2140,7 +2150,15 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) */ TCP_SKB_CB(skb)->when = tcp_time_stamp; - err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); + /* make sure skb->data is aligned on arches that require it */ + if (unlikely(NET_IP_ALIGN && ((unsigned long)skb->data & 3))) { + struct sk_buff *nskb = __pskb_copy(skb, MAX_TCP_HEADER, + GFP_ATOMIC); + err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) : + -ENOBUFS; + } else { + err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); + } if (err == 0) { /* Update global TCP statistics. */ @@ -2179,7 +2197,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) static int tcp_can_forward_retransmit(struct sock *sk) { const struct inet_connection_sock *icsk = inet_csk(sk); - struct tcp_sock *tp = tcp_sk(sk); + const struct tcp_sock *tp = tcp_sk(sk); /* Forward retransmissions are possible only during Recovery. */ if (icsk->icsk_ca_state != TCP_CA_Recovery) @@ -2294,6 +2312,9 @@ begin_fwd: return; NET_INC_STATS_BH(sock_net(sk), mib_idx); + if (inet_csk(sk)->icsk_ca_state == TCP_CA_Recovery) + tp->prr_out += tcp_skb_pcount(skb); + if (skb == tcp_write_queue_head(sk)) inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, inet_csk(sk)->icsk_rto, @@ -2317,7 +2338,7 @@ void tcp_send_fin(struct sock *sk) mss_now = tcp_current_mss(sk); if (tcp_send_head(sk) != NULL) { - TCP_SKB_CB(skb)->flags |= TCPHDR_FIN; + TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_FIN; TCP_SKB_CB(skb)->end_seq++; tp->write_seq++; } else { @@ -2379,11 +2400,11 @@ int tcp_send_synack(struct sock *sk) struct sk_buff *skb; skb = tcp_write_queue_head(sk); - if (skb == NULL || !(TCP_SKB_CB(skb)->flags & TCPHDR_SYN)) { + if (skb == NULL || !(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) { printk(KERN_DEBUG "tcp_send_synack: wrong queue state\n"); return -EFAULT; } - if (!(TCP_SKB_CB(skb)->flags & TCPHDR_ACK)) { + if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_ACK)) { if (skb_cloned(skb)) { struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC); if (nskb == NULL) @@ -2397,7 +2418,7 @@ int tcp_send_synack(struct sock *sk) skb = nskb; } - TCP_SKB_CB(skb)->flags |= TCPHDR_ACK; + TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ACK; TCP_ECN_send_synack(tcp_sk(sk), skb); } TCP_SKB_CB(skb)->when = tcp_time_stamp; @@ -2542,7 +2563,7 @@ EXPORT_SYMBOL(tcp_make_synack); /* Do all connect socket setups that can be done AF independent. */ static void tcp_connect_init(struct sock *sk) { - struct dst_entry *dst = __sk_dst_get(sk); + const struct dst_entry *dst = __sk_dst_get(sk); struct tcp_sock *tp = tcp_sk(sk); __u8 rcv_wscale; @@ -2794,13 +2815,13 @@ int tcp_write_wakeup(struct sock *sk) if (seg_size < TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq || skb->len > mss) { seg_size = min(seg_size, mss); - TCP_SKB_CB(skb)->flags |= TCPHDR_PSH; + TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH; if (tcp_fragment(sk, skb, seg_size, mss)) return -1; } else if (!tcp_skb_pcount(skb)) tcp_set_skb_tso_segs(sk, skb, mss); - TCP_SKB_CB(skb)->flags |= TCPHDR_PSH; + TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH; TCP_SKB_CB(skb)->when = tcp_time_stamp; err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); if (!err) diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index ecd44b0c45f1..a516d1e399df 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -171,13 +171,13 @@ static int tcp_write_timeout(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); int retry_until; - bool do_reset, syn_set = 0; + bool do_reset, syn_set = false; if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) { if (icsk->icsk_retransmits) dst_negative_advice(sk); retry_until = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries; - syn_set = 1; + syn_set = true; } else { if (retransmits_timed_out(sk, sysctl_tcp_retries1, 0, 0)) { /* Black hole detection */ @@ -261,7 +261,7 @@ static void tcp_delack_timer(unsigned long data) } out: - if (tcp_memory_pressure) + if (sk_under_memory_pressure(sk)) sk_mem_reclaim(sk); out_unlock: bh_unlock_sock(sk); @@ -334,14 +334,13 @@ void tcp_retransmit_timer(struct sock *sk) * connection. If the socket is an orphan, time it out, * we cannot allow such beasts to hang infinitely. */ -#ifdef TCP_DEBUG struct inet_sock *inet = inet_sk(sk); if (sk->sk_family == AF_INET) { LIMIT_NETDEBUG(KERN_DEBUG "TCP: Peer %pI4:%u/%u unexpectedly shrunk window %u:%u (repaired)\n", &inet->inet_daddr, ntohs(inet->inet_dport), inet->inet_num, tp->snd_una, tp->snd_nxt); } -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) else if (sk->sk_family == AF_INET6) { struct ipv6_pinfo *np = inet6_sk(sk); LIMIT_NETDEBUG(KERN_DEBUG "TCP: Peer %pI6:%u/%u unexpectedly shrunk window %u:%u (repaired)\n", @@ -349,7 +348,6 @@ void tcp_retransmit_timer(struct sock *sk) inet->inet_num, tp->snd_una, tp->snd_nxt); } #endif -#endif if (tcp_time_stamp - tp->rcv_tstamp > TCP_RTO_MAX) { tcp_write_err(sk); goto out; diff --git a/net/ipv4/tunnel4.c b/net/ipv4/tunnel4.c index ac3b3ee4b07c..01775983b997 100644 --- a/net/ipv4/tunnel4.c +++ b/net/ipv4/tunnel4.c @@ -105,7 +105,7 @@ drop: return 0; } -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) static int tunnel64_rcv(struct sk_buff *skb) { struct xfrm_tunnel *handler; @@ -134,7 +134,7 @@ static void tunnel4_err(struct sk_buff *skb, u32 info) break; } -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) static void tunnel64_err(struct sk_buff *skb, u32 info) { struct xfrm_tunnel *handler; @@ -152,7 +152,7 @@ static const struct net_protocol tunnel4_protocol = { .netns_ok = 1, }; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) static const struct net_protocol tunnel64_protocol = { .handler = tunnel64_rcv, .err_handler = tunnel64_err, @@ -167,7 +167,7 @@ static int __init tunnel4_init(void) printk(KERN_ERR "tunnel4 init: can't add protocol\n"); return -EAGAIN; } -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) if (inet_add_protocol(&tunnel64_protocol, IPPROTO_IPV6)) { printk(KERN_ERR "tunnel64 init: can't add protocol\n"); inet_del_protocol(&tunnel4_protocol, IPPROTO_IPIP); @@ -179,7 +179,7 @@ static int __init tunnel4_init(void) static void __exit tunnel4_fini(void) { -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) if (inet_del_protocol(&tunnel64_protocol, IPPROTO_IPV6)) printk(KERN_ERR "tunnel64 close: can't remove protocol\n"); #endif diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 1b5a19340a95..5d075b5f70fc 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -445,7 +445,7 @@ exact_match: /* UDP is nearly always wildcards out the wazoo, it makes no sense to try * harder than this. -DaveM */ -static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, +struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, __be32 daddr, __be16 dport, int dif, struct udp_table *udptable) { @@ -512,6 +512,7 @@ begin: rcu_read_unlock(); return result; } +EXPORT_SYMBOL_GPL(__udp4_lib_lookup); static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb, __be16 sport, __be16 dport, @@ -1164,7 +1165,7 @@ int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, struct inet_sock *inet = inet_sk(sk); struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name; struct sk_buff *skb; - unsigned int ulen; + unsigned int ulen, copied; int peeked; int err; int is_udplite = IS_UDPLITE(sk); @@ -1186,9 +1187,10 @@ try_again: goto out; ulen = skb->len - sizeof(struct udphdr); - if (len > ulen) - len = ulen; - else if (len < ulen) + copied = len; + if (copied > ulen) + copied = ulen; + else if (copied < ulen) msg->msg_flags |= MSG_TRUNC; /* @@ -1197,14 +1199,14 @@ try_again: * coverage checksum (UDP-Lite), do it before the copy. */ - if (len < ulen || UDP_SKB_CB(skb)->partial_cov) { + if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) { if (udp_lib_checksum_complete(skb)) goto csum_copy_err; } if (skb_csum_unnecessary(skb)) err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), - msg->msg_iov, len); + msg->msg_iov, copied); else { err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), @@ -1233,7 +1235,7 @@ try_again: if (inet->cmsg_flags) ip_cmsg_recv(msg, skb); - err = len; + err = copied; if (flags & MSG_TRUNC) err = ulen; @@ -1267,7 +1269,7 @@ int udp_disconnect(struct sock *sk, int flags) sk->sk_state = TCP_CLOSE; inet->inet_daddr = 0; inet->inet_dport = 0; - sock_rps_save_rxhash(sk, 0); + sock_rps_reset_rxhash(sk); sk->sk_bound_dev_if = 0; if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) inet_reset_saddr(sk); @@ -1355,9 +1357,9 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) int rc; if (inet_sk(sk)->inet_daddr) - sock_rps_save_rxhash(sk, skb->rxhash); + sock_rps_save_rxhash(sk, skb); - rc = ip_queue_rcv_skb(sk, skb); + rc = sock_queue_rcv_skb(sk, skb); if (rc < 0) { int is_udplite = IS_UDPLITE(sk); @@ -1397,6 +1399,8 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) nf_reset(skb); if (up->encap_type) { + int (*encap_rcv)(struct sock *sk, struct sk_buff *skb); + /* * This is an encapsulation socket so pass the skb to * the socket's udp_encap_rcv() hook. Otherwise, just @@ -1409,11 +1413,11 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) */ /* if we're overly short, let UDP handle it */ - if (skb->len > sizeof(struct udphdr) && - up->encap_rcv != NULL) { + encap_rcv = ACCESS_ONCE(up->encap_rcv); + if (skb->len > sizeof(struct udphdr) && encap_rcv != NULL) { int ret; - ret = (*up->encap_rcv)(sk, skb); + ret = encap_rcv(sk, skb); if (ret <= 0) { UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INDATAGRAMS, @@ -1461,10 +1465,9 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) } } - if (rcu_dereference_raw(sk->sk_filter)) { - if (udp_lib_checksum_complete(skb)) - goto drop; - } + if (rcu_access_pointer(sk->sk_filter) && + udp_lib_checksum_complete(skb)) + goto drop; if (sk_rcvqueues_full(sk, skb)) @@ -1472,6 +1475,7 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) rc = 0; + ipv4_pktinfo_prepare(skb); bh_lock_sock(sk); if (!sock_owned_by_user(sk)) rc = __udp_queue_rcv_skb(sk, skb); @@ -2038,7 +2042,7 @@ static void udp_seq_stop(struct seq_file *seq, void *v) spin_unlock_bh(&state->udp_table->hash[state->bucket].lock); } -static int udp_seq_open(struct inode *inode, struct file *file) +int udp_seq_open(struct inode *inode, struct file *file) { struct udp_seq_afinfo *afinfo = PDE(inode)->data; struct udp_iter_state *s; @@ -2054,6 +2058,7 @@ static int udp_seq_open(struct inode *inode, struct file *file) s->udp_table = afinfo->udp_table; return err; } +EXPORT_SYMBOL(udp_seq_open); /* ------------------------------------------------------------------------ */ int udp_proc_register(struct net *net, struct udp_seq_afinfo *afinfo) @@ -2061,17 +2066,12 @@ int udp_proc_register(struct net *net, struct udp_seq_afinfo *afinfo) struct proc_dir_entry *p; int rc = 0; - afinfo->seq_fops.open = udp_seq_open; - afinfo->seq_fops.read = seq_read; - afinfo->seq_fops.llseek = seq_lseek; - afinfo->seq_fops.release = seq_release_net; - afinfo->seq_ops.start = udp_seq_start; afinfo->seq_ops.next = udp_seq_next; afinfo->seq_ops.stop = udp_seq_stop; p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net, - &afinfo->seq_fops, afinfo); + afinfo->seq_fops, afinfo); if (!p) rc = -ENOMEM; return rc; @@ -2121,14 +2121,20 @@ int udp4_seq_show(struct seq_file *seq, void *v) return 0; } +static const struct file_operations udp_afinfo_seq_fops = { + .owner = THIS_MODULE, + .open = udp_seq_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_net +}; + /* ------------------------------------------------------------------------ */ static struct udp_seq_afinfo udp4_seq_afinfo = { .name = "udp", .family = AF_INET, .udp_table = &udp_table, - .seq_fops = { - .owner = THIS_MODULE, - }, + .seq_fops = &udp_afinfo_seq_fops, .seq_ops = { .show = udp4_seq_show, }, @@ -2243,7 +2249,8 @@ int udp4_ufo_send_check(struct sk_buff *skb) return 0; } -struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, u32 features) +struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, + netdev_features_t features) { struct sk_buff *segs = ERR_PTR(-EINVAL); unsigned int mss; diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c new file mode 100644 index 000000000000..e5e18cb8a586 --- /dev/null +++ b/net/ipv4/udp_diag.c @@ -0,0 +1,201 @@ +/* + * udp_diag.c Module for monitoring UDP transport protocols sockets. + * + * Authors: Pavel Emelyanov, <xemul@parallels.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + + +#include <linux/module.h> +#include <linux/inet_diag.h> +#include <linux/udp.h> +#include <net/udp.h> +#include <net/udplite.h> +#include <linux/inet_diag.h> +#include <linux/sock_diag.h> + +static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, + struct netlink_callback *cb, struct inet_diag_req_v2 *req, + struct nlattr *bc) +{ + if (!inet_diag_bc_sk(bc, sk)) + return 0; + + return inet_sk_diag_fill(sk, NULL, skb, req, NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh); +} + +static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb, + const struct nlmsghdr *nlh, struct inet_diag_req_v2 *req) +{ + int err = -EINVAL; + struct sock *sk; + struct sk_buff *rep; + + if (req->sdiag_family == AF_INET) + sk = __udp4_lib_lookup(&init_net, + req->id.idiag_src[0], req->id.idiag_sport, + req->id.idiag_dst[0], req->id.idiag_dport, + req->id.idiag_if, tbl); +#if IS_ENABLED(CONFIG_IPV6) + else if (req->sdiag_family == AF_INET6) + sk = __udp6_lib_lookup(&init_net, + (struct in6_addr *)req->id.idiag_src, + req->id.idiag_sport, + (struct in6_addr *)req->id.idiag_dst, + req->id.idiag_dport, + req->id.idiag_if, tbl); +#endif + else + goto out_nosk; + + err = -ENOENT; + if (sk == NULL) + goto out_nosk; + + err = sock_diag_check_cookie(sk, req->id.idiag_cookie); + if (err) + goto out; + + err = -ENOMEM; + rep = alloc_skb(NLMSG_SPACE((sizeof(struct inet_diag_msg) + + sizeof(struct inet_diag_meminfo) + + 64)), GFP_KERNEL); + if (!rep) + goto out; + + err = inet_sk_diag_fill(sk, NULL, rep, req, + NETLINK_CB(in_skb).pid, + nlh->nlmsg_seq, 0, nlh); + if (err < 0) { + WARN_ON(err == -EMSGSIZE); + kfree_skb(rep); + goto out; + } + err = netlink_unicast(sock_diag_nlsk, rep, NETLINK_CB(in_skb).pid, + MSG_DONTWAIT); + if (err > 0) + err = 0; +out: + if (sk) + sock_put(sk); +out_nosk: + return err; +} + +static void udp_dump(struct udp_table *table, struct sk_buff *skb, struct netlink_callback *cb, + struct inet_diag_req_v2 *r, struct nlattr *bc) +{ + int num, s_num, slot, s_slot; + + s_slot = cb->args[0]; + num = s_num = cb->args[1]; + + for (slot = s_slot; slot <= table->mask; num = s_num = 0, slot++) { + struct sock *sk; + struct hlist_nulls_node *node; + struct udp_hslot *hslot = &table->hash[slot]; + + if (hlist_nulls_empty(&hslot->head)) + continue; + + spin_lock_bh(&hslot->lock); + sk_nulls_for_each(sk, node, &hslot->head) { + struct inet_sock *inet = inet_sk(sk); + + if (num < s_num) + goto next; + if (!(r->idiag_states & (1 << sk->sk_state))) + goto next; + if (r->sdiag_family != AF_UNSPEC && + sk->sk_family != r->sdiag_family) + goto next; + if (r->id.idiag_sport != inet->inet_sport && + r->id.idiag_sport) + goto next; + if (r->id.idiag_dport != inet->inet_dport && + r->id.idiag_dport) + goto next; + + if (sk_diag_dump(sk, skb, cb, r, bc) < 0) { + spin_unlock_bh(&hslot->lock); + goto done; + } +next: + num++; + } + spin_unlock_bh(&hslot->lock); + } +done: + cb->args[0] = slot; + cb->args[1] = num; +} + +static void udp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, + struct inet_diag_req_v2 *r, struct nlattr *bc) +{ + udp_dump(&udp_table, skb, cb, r, bc); +} + +static int udp_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh, + struct inet_diag_req_v2 *req) +{ + return udp_dump_one(&udp_table, in_skb, nlh, req); +} + +static const struct inet_diag_handler udp_diag_handler = { + .dump = udp_diag_dump, + .dump_one = udp_diag_dump_one, + .idiag_type = IPPROTO_UDP, +}; + +static void udplite_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, + struct inet_diag_req_v2 *r, struct nlattr *bc) +{ + udp_dump(&udplite_table, skb, cb, r, bc); +} + +static int udplite_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh, + struct inet_diag_req_v2 *req) +{ + return udp_dump_one(&udplite_table, in_skb, nlh, req); +} + +static const struct inet_diag_handler udplite_diag_handler = { + .dump = udplite_diag_dump, + .dump_one = udplite_diag_dump_one, + .idiag_type = IPPROTO_UDPLITE, +}; + +static int __init udp_diag_init(void) +{ + int err; + + err = inet_diag_register(&udp_diag_handler); + if (err) + goto out; + err = inet_diag_register(&udplite_diag_handler); + if (err) + goto out_lite; +out: + return err; +out_lite: + inet_diag_unregister(&udp_diag_handler); + goto out; +} + +static void __exit udp_diag_exit(void) +{ + inet_diag_unregister(&udplite_diag_handler); + inet_diag_unregister(&udp_diag_handler); +} + +module_init(udp_diag_init); +module_exit(udp_diag_exit); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2-17 /* AF_INET - IPPROTO_UDP */); +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2-136 /* AF_INET - IPPROTO_UDPLITE */); diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c index aee9963f7f5a..12e9499a1a6c 100644 --- a/net/ipv4/udplite.c +++ b/net/ipv4/udplite.c @@ -10,6 +10,7 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ +#include <linux/export.h> #include "udp_impl.h" struct udp_table udplite_table __read_mostly; @@ -71,13 +72,20 @@ static struct inet_protosw udplite4_protosw = { }; #ifdef CONFIG_PROC_FS + +static const struct file_operations udplite_afinfo_seq_fops = { + .owner = THIS_MODULE, + .open = udp_seq_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_net +}; + static struct udp_seq_afinfo udplite4_seq_afinfo = { .name = "udplite", .family = AF_INET, .udp_table = &udplite_table, - .seq_fops = { - .owner = THIS_MODULE, - }, + .seq_fops = &udplite_afinfo_seq_fops, .seq_ops = { .show = udp4_seq_show, }, diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index fc5368ad2b0d..a0b4c5da8d43 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -79,13 +79,13 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, struct rtable *rt = (struct rtable *)xdst->route; const struct flowi4 *fl4 = &fl->u.ip4; - rt->rt_key_dst = fl4->daddr; - rt->rt_key_src = fl4->saddr; - rt->rt_key_tos = fl4->flowi4_tos; - rt->rt_route_iif = fl4->flowi4_iif; - rt->rt_iif = fl4->flowi4_iif; - rt->rt_oif = fl4->flowi4_oif; - rt->rt_mark = fl4->flowi4_mark; + xdst->u.rt.rt_key_dst = fl4->daddr; + xdst->u.rt.rt_key_src = fl4->saddr; + xdst->u.rt.rt_key_tos = fl4->flowi4_tos; + xdst->u.rt.rt_route_iif = fl4->flowi4_iif; + xdst->u.rt.rt_iif = fl4->flowi4_iif; + xdst->u.rt.rt_oif = fl4->flowi4_oif; + xdst->u.rt.rt_mark = fl4->flowi4_mark; xdst->u.dst.dev = dev; dev_hold(dev); diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c index d9ac0a0058b5..9258e751baba 100644 --- a/net/ipv4/xfrm4_state.c +++ b/net/ipv4/xfrm4_state.c @@ -12,6 +12,7 @@ #include <linux/pfkeyv2.h> #include <linux/ipsec.h> #include <linux/netfilter_ipv4.h> +#include <linux/export.h> static int xfrm4_init_flags(struct xfrm_state *x) { diff --git a/net/ipv4/xfrm4_tunnel.c b/net/ipv4/xfrm4_tunnel.c index 82806455e859..9247d9d70e9d 100644 --- a/net/ipv4/xfrm4_tunnel.c +++ b/net/ipv4/xfrm4_tunnel.c @@ -64,7 +64,7 @@ static struct xfrm_tunnel xfrm_tunnel_handler __read_mostly = { .priority = 2, }; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) static struct xfrm_tunnel xfrm64_tunnel_handler __read_mostly = { .handler = xfrm_tunnel_rcv, .err_handler = xfrm_tunnel_err, @@ -84,7 +84,7 @@ static int __init ipip_init(void) xfrm_unregister_type(&ipip_type, AF_INET); return -EAGAIN; } -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) if (xfrm4_tunnel_register(&xfrm64_tunnel_handler, AF_INET6)) { printk(KERN_INFO "ipip init: can't add xfrm handler for AF_INET6\n"); xfrm4_tunnel_deregister(&xfrm_tunnel_handler, AF_INET); @@ -97,7 +97,7 @@ static int __init ipip_init(void) static void __exit ipip_fini(void) { -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) if (xfrm4_tunnel_deregister(&xfrm64_tunnel_handler, AF_INET6)) printk(KERN_INFO "ipip close: can't remove xfrm handler for AF_INET6\n"); #endif diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 12368c586068..a225d5ee3c2f 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -87,6 +87,7 @@ #include <linux/proc_fs.h> #include <linux/seq_file.h> +#include <linux/export.h> /* Set to 3 to get tracing... */ #define ACONF_DEBUG 2 @@ -629,13 +630,13 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen, goto out; } - rt = addrconf_dst_alloc(idev, addr, 0); + rt = addrconf_dst_alloc(idev, addr, false); if (IS_ERR(rt)) { err = PTR_ERR(rt); goto out; } - ipv6_addr_copy(&ifa->addr, addr); + ifa->addr = *addr; spin_lock_init(&ifa->lock); spin_lock_init(&ifa->state_lock); @@ -649,16 +650,6 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen, ifa->rt = rt; - /* - * part one of RFC 4429, section 3.3 - * We should not configure an address as - * optimistic if we do not yet know the link - * layer address of our nexhop router - */ - - if (dst_get_neighbour_raw(&rt->dst) == NULL) - ifa->flags &= ~IFA_F_OPTIMISTIC; - ifa->idev = idev; in6_dev_hold(idev); /* For caller */ @@ -806,7 +797,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp) ip6_del_rt(rt); rt = NULL; } else if (!(rt->rt6i_flags & RTF_EXPIRES)) { - rt->rt6i_expires = expires; + rt->dst.expires = expires; rt->rt6i_flags |= RTF_EXPIRES; } } @@ -824,12 +815,13 @@ static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, struct inet6_ifaddr *i { struct inet6_dev *idev = ifp->idev; struct in6_addr addr, *tmpaddr; - unsigned long tmp_prefered_lft, tmp_valid_lft, tmp_cstamp, tmp_tstamp, age; + unsigned long tmp_prefered_lft, tmp_valid_lft, tmp_tstamp, age; unsigned long regen_advance; int tmp_plen; int ret = 0; int max_addresses; u32 addr_flags; + unsigned long now = jiffies; write_lock(&idev->lock); if (ift) { @@ -874,7 +866,7 @@ retry: goto out; } memcpy(&addr.s6_addr[8], idev->rndid, 8); - age = (jiffies - ifp->tstamp) / HZ; + age = (now - ifp->tstamp) / HZ; tmp_valid_lft = min_t(__u32, ifp->valid_lft, idev->cnf.temp_valid_lft + age); @@ -884,7 +876,6 @@ retry: idev->cnf.max_desync_factor); tmp_plen = ifp->prefix_len; max_addresses = idev->cnf.max_addresses; - tmp_cstamp = ifp->cstamp; tmp_tstamp = ifp->tstamp; spin_unlock_bh(&ifp->lock); @@ -929,7 +920,7 @@ retry: ift->ifpub = ifp; ift->valid_lft = tmp_valid_lft; ift->prefered_lft = tmp_prefered_lft; - ift->cstamp = tmp_cstamp; + ift->cstamp = now; ift->tstamp = tmp_tstamp; spin_unlock_bh(&ift->lock); @@ -1227,7 +1218,7 @@ try_nextdev: if (!hiscore->ifa) return -EADDRNOTAVAIL; - ipv6_addr_copy(saddr, &hiscore->ifa->addr); + *saddr = hiscore->ifa->addr; in6_ifa_put(hiscore->ifa); return 0; } @@ -1248,7 +1239,7 @@ int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr, list_for_each_entry(ifp, &idev->addr_list, if_list) { if (ifp->scope == IFA_LINK && !(ifp->flags & banned_flags)) { - ipv6_addr_copy(addr, &ifp->addr); + *addr = ifp->addr; err = 0; break; } @@ -1699,7 +1690,7 @@ addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev, .fc_protocol = RTPROT_KERNEL, }; - ipv6_addr_copy(&cfg.fc_dst, pfx); + cfg.fc_dst = *pfx; /* Prevent useless cloning on PtP SIT. This thing is done here expecting that the whole @@ -1713,6 +1704,40 @@ addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev, ip6_route_add(&cfg); } + +static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx, + int plen, + const struct net_device *dev, + u32 flags, u32 noflags) +{ + struct fib6_node *fn; + struct rt6_info *rt = NULL; + struct fib6_table *table; + + table = fib6_get_table(dev_net(dev), RT6_TABLE_PREFIX); + if (table == NULL) + return NULL; + + write_lock_bh(&table->tb6_lock); + fn = fib6_locate(&table->tb6_root, pfx, plen, NULL, 0); + if (!fn) + goto out; + for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { + if (rt->dst.dev->ifindex != dev->ifindex) + continue; + if ((rt->rt6i_flags & flags) != flags) + continue; + if ((noflags != 0) && ((rt->rt6i_flags & flags) != 0)) + continue; + dst_hold(&rt->dst); + break; + } +out: + write_unlock_bh(&table->tb6_lock); + return rt; +} + + /* Create "default" multicast route to the interface */ static void addrconf_add_mroute(struct net_device *dev) @@ -1770,14 +1795,15 @@ static struct inet6_dev *addrconf_add_dev(struct net_device *dev) return ERR_PTR(-EACCES); /* Add default multicast route */ - addrconf_add_mroute(dev); + if (!(dev->flags & IFF_LOOPBACK)) + addrconf_add_mroute(dev); /* Add link local route */ addrconf_add_lroute(dev); return idev; } -void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len) +void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao) { struct prefix_info *pinfo; __u32 valid_lft; @@ -1842,21 +1868,24 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len) if (addrconf_finite_timeout(rt_expires)) rt_expires *= HZ; - rt = rt6_lookup(net, &pinfo->prefix, NULL, - dev->ifindex, 1); + rt = addrconf_get_prefix_route(&pinfo->prefix, + pinfo->prefix_len, + dev, + RTF_ADDRCONF | RTF_PREFIX_RT, + RTF_GATEWAY | RTF_DEFAULT); - if (rt && addrconf_is_prefix_route(rt)) { + if (rt) { /* Autoconf prefix route */ if (valid_lft == 0) { ip6_del_rt(rt); rt = NULL; } else if (addrconf_finite_timeout(rt_expires)) { /* not infinity */ - rt->rt6i_expires = jiffies + rt_expires; + rt->dst.expires = jiffies + rt_expires; rt->rt6i_flags |= RTF_EXPIRES; } else { rt->rt6i_flags &= ~RTF_EXPIRES; - rt->rt6i_expires = 0; + rt->dst.expires = 0; } } else if (valid_lft) { clock_t expires = 0; @@ -1905,7 +1934,7 @@ ok: #ifdef CONFIG_IPV6_OPTIMISTIC_DAD if (in6_dev->cnf.optimistic_dad && - !net->ipv6.devconf_all->forwarding) + !net->ipv6.devconf_all->forwarding && sllao) addr_flags = IFA_F_OPTIMISTIC; #endif @@ -1999,25 +2028,50 @@ ok: #ifdef CONFIG_IPV6_PRIVACY read_lock_bh(&in6_dev->lock); /* update all temporary addresses in the list */ - list_for_each_entry(ift, &in6_dev->tempaddr_list, tmp_list) { - /* - * When adjusting the lifetimes of an existing - * temporary address, only lower the lifetimes. - * Implementations must not increase the - * lifetimes of an existing temporary address - * when processing a Prefix Information Option. - */ + list_for_each_entry(ift, &in6_dev->tempaddr_list, + tmp_list) { + int age, max_valid, max_prefered; + if (ifp != ift->ifpub) continue; + /* + * RFC 4941 section 3.3: + * If a received option will extend the lifetime + * of a public address, the lifetimes of + * temporary addresses should be extended, + * subject to the overall constraint that no + * temporary addresses should ever remain + * "valid" or "preferred" for a time longer than + * (TEMP_VALID_LIFETIME) or + * (TEMP_PREFERRED_LIFETIME - DESYNC_FACTOR), + * respectively. + */ + age = (now - ift->cstamp) / HZ; + max_valid = in6_dev->cnf.temp_valid_lft - age; + if (max_valid < 0) + max_valid = 0; + + max_prefered = in6_dev->cnf.temp_prefered_lft - + in6_dev->cnf.max_desync_factor - + age; + if (max_prefered < 0) + max_prefered = 0; + + if (valid_lft > max_valid) + valid_lft = max_valid; + + if (prefered_lft > max_prefered) + prefered_lft = max_prefered; + spin_lock(&ift->lock); flags = ift->flags; - if (ift->valid_lft > valid_lft && - ift->valid_lft - valid_lft > (jiffies - ift->tstamp) / HZ) - ift->valid_lft = valid_lft + (jiffies - ift->tstamp) / HZ; - if (ift->prefered_lft > prefered_lft && - ift->prefered_lft - prefered_lft > (jiffies - ift->tstamp) / HZ) - ift->prefered_lft = prefered_lft + (jiffies - ift->tstamp) / HZ; + ift->valid_lft = valid_lft; + ift->prefered_lft = prefered_lft; + ift->tstamp = now; + if (prefered_lft > 0) + ift->flags &= ~IFA_F_DEPRECATED; + spin_unlock(&ift->lock); if (!(flags&IFA_F_TENTATIVE)) ipv6_ifa_notify(0, ift); @@ -2025,9 +2079,11 @@ ok: if ((create || list_empty(&in6_dev->tempaddr_list)) && in6_dev->cnf.use_tempaddr > 0) { /* - * When a new public address is created as described in [ADDRCONF], - * also create a new temporary address. Also create a temporary - * address if it's enabled but no temporary address currently exists. + * When a new public address is created as + * described in [ADDRCONF], also create a new + * temporary address. Also create a temporary + * address if it's enabled but no temporary + * address currently exists. */ read_unlock_bh(&in6_dev->lock); ipv6_create_tempaddr(ifp, NULL); @@ -2706,7 +2762,7 @@ static int addrconf_ifdown(struct net_device *dev, int how) idev->dead = 1; /* protected by rtnl_lock */ - rcu_assign_pointer(dev->ip6_ptr, NULL); + RCU_INIT_POINTER(dev->ip6_ptr, NULL); /* Step 1.5: remove snmp6 entry */ snmp6_unregister_dev(idev); @@ -2969,12 +3025,12 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp) ipv6_ifa_notify(RTM_NEWADDR, ifp); - /* If added prefix is link local and forwarding is off, - start sending router solicitations. + /* If added prefix is link local and we are prepared to process + router advertisements, start sending router solicitations. */ - if ((ifp->idev->cnf.forwarding == 0 || - ifp->idev->cnf.forwarding == 2) && + if (((ifp->idev->cnf.accept_ra == 1 && !ifp->idev->cnf.forwarding) || + ifp->idev->cnf.accept_ra == 2) && ifp->idev->cnf.rtr_solicits > 0 && (dev->flags&IFF_LOOPBACK) == 0 && (ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL)) { @@ -3012,20 +3068,39 @@ static void addrconf_dad_run(struct inet6_dev *idev) struct if6_iter_state { struct seq_net_private p; int bucket; + int offset; }; -static struct inet6_ifaddr *if6_get_first(struct seq_file *seq) +static struct inet6_ifaddr *if6_get_first(struct seq_file *seq, loff_t pos) { struct inet6_ifaddr *ifa = NULL; struct if6_iter_state *state = seq->private; struct net *net = seq_file_net(seq); + int p = 0; - for (state->bucket = 0; state->bucket < IN6_ADDR_HSIZE; ++state->bucket) { + /* initial bucket if pos is 0 */ + if (pos == 0) { + state->bucket = 0; + state->offset = 0; + } + + for (; state->bucket < IN6_ADDR_HSIZE; ++state->bucket) { struct hlist_node *n; hlist_for_each_entry_rcu_bh(ifa, n, &inet6_addr_lst[state->bucket], - addr_lst) + addr_lst) { + /* sync with offset */ + if (p < state->offset) { + p++; + continue; + } + state->offset++; if (net_eq(dev_net(ifa->idev->dev), net)) return ifa; + } + + /* prepare for next bucket */ + state->offset = 0; + p = 0; } return NULL; } @@ -3037,13 +3112,17 @@ static struct inet6_ifaddr *if6_get_next(struct seq_file *seq, struct net *net = seq_file_net(seq); struct hlist_node *n = &ifa->addr_lst; - hlist_for_each_entry_continue_rcu_bh(ifa, n, addr_lst) + hlist_for_each_entry_continue_rcu_bh(ifa, n, addr_lst) { + state->offset++; if (net_eq(dev_net(ifa->idev->dev), net)) return ifa; + } while (++state->bucket < IN6_ADDR_HSIZE) { + state->offset = 0; hlist_for_each_entry_rcu_bh(ifa, n, &inet6_addr_lst[state->bucket], addr_lst) { + state->offset++; if (net_eq(dev_net(ifa->idev->dev), net)) return ifa; } @@ -3052,21 +3131,11 @@ static struct inet6_ifaddr *if6_get_next(struct seq_file *seq, return NULL; } -static struct inet6_ifaddr *if6_get_idx(struct seq_file *seq, loff_t pos) -{ - struct inet6_ifaddr *ifa = if6_get_first(seq); - - if (ifa) - while (pos && (ifa = if6_get_next(seq, ifa)) != NULL) - --pos; - return pos ? NULL : ifa; -} - static void *if6_seq_start(struct seq_file *seq, loff_t *pos) __acquires(rcu_bh) { rcu_read_lock_bh(); - return if6_get_idx(seq, *pos); + return if6_get_first(seq, *pos); } static void *if6_seq_next(struct seq_file *seq, void *v, loff_t *pos) diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c index 6b03826552e1..399287e595d7 100644 --- a/net/ipv6/addrconf_core.c +++ b/net/ipv6/addrconf_core.c @@ -3,6 +3,7 @@ * not configured or static. */ +#include <linux/export.h> #include <net/ipv6.h> #define IPV6_ADDR_SCOPE_TYPE(scope) ((scope) << 16) diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index d27c797f9f05..273f48d1df2e 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -347,7 +347,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) */ v4addr = LOOPBACK4_IPV6; if (!(addr_type & IPV6_ADDR_MULTICAST)) { - if (!inet->transparent && + if (!(inet->freebind || inet->transparent) && !ipv6_chk_addr(net, &addr->sin6_addr, dev, 0)) { err = -EADDRNOTAVAIL; @@ -361,10 +361,10 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) inet->inet_rcv_saddr = v4addr; inet->inet_saddr = v4addr; - ipv6_addr_copy(&np->rcv_saddr, &addr->sin6_addr); + np->rcv_saddr = addr->sin6_addr; if (!(addr_type & IPV6_ADDR_MULTICAST)) - ipv6_addr_copy(&np->saddr, &addr->sin6_addr); + np->saddr = addr->sin6_addr; /* Make sure we are allowed to bind here. */ if (sk->sk_prot->get_port(sk, snum)) { @@ -458,14 +458,14 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr, peer == 1) return -ENOTCONN; sin->sin6_port = inet->inet_dport; - ipv6_addr_copy(&sin->sin6_addr, &np->daddr); + sin->sin6_addr = np->daddr; if (np->sndflow) sin->sin6_flowinfo = np->flow_label; } else { if (ipv6_addr_any(&np->rcv_saddr)) - ipv6_addr_copy(&sin->sin6_addr, &np->saddr); + sin->sin6_addr = np->saddr; else - ipv6_addr_copy(&sin->sin6_addr, &np->rcv_saddr); + sin->sin6_addr = np->rcv_saddr; sin->sin6_port = inet->inet_sport; } @@ -660,8 +660,8 @@ int inet6_sk_rebuild_header(struct sock *sk) memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_proto = sk->sk_protocol; - ipv6_addr_copy(&fl6.daddr, &np->daddr); - ipv6_addr_copy(&fl6.saddr, &np->saddr); + fl6.daddr = np->daddr; + fl6.saddr = np->saddr; fl6.flowlabel = np->flow_label; fl6.flowi6_oif = sk->sk_bound_dev_if; fl6.flowi6_mark = sk->sk_mark; @@ -769,7 +769,8 @@ out: return err; } -static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, u32 features) +static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, + netdev_features_t features) { struct sk_buff *segs = ERR_PTR(-EINVAL); struct ipv6hdr *ipv6h; @@ -985,9 +986,9 @@ static int __net_init ipv6_init_mibs(struct net *net) sizeof(struct icmpv6_mib), __alignof__(struct icmpv6_mib)) < 0) goto err_icmp_mib; - if (snmp_mib_init((void __percpu **)net->mib.icmpv6msg_statistics, - sizeof(struct icmpv6msg_mib), - __alignof__(struct icmpv6msg_mib)) < 0) + net->mib.icmpv6msg_statistics = kzalloc(sizeof(struct icmpv6msg_mib), + GFP_KERNEL); + if (!net->mib.icmpv6msg_statistics) goto err_icmpmsg_mib; return 0; @@ -1008,7 +1009,7 @@ static void ipv6_cleanup_mibs(struct net *net) snmp_mib_free((void __percpu **)net->mib.udplite_stats_in6); snmp_mib_free((void __percpu **)net->mib.ipv6_statistics); snmp_mib_free((void __percpu **)net->mib.icmpv6_statistics); - snmp_mib_free((void __percpu **)net->mib.icmpv6msg_statistics); + kfree(net->mib.icmpv6msg_statistics); } static int __net_init inet6_net_init(struct net *net) @@ -1115,6 +1116,8 @@ static int __init inet6_init(void) if (err) goto static_sysctl_fail; #endif + tcpv6_prot.sysctl_mem = init_net.ipv4.sysctl_tcp_mem; + /* * ipngwg API draft makes clear that the correct semantics * for TCP and UDP is to consider one TCP and UDP instance diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 2195ae651923..2ae79dbeec2f 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -193,9 +193,9 @@ static void ipv6_rearrange_destopt(struct ipv6hdr *iph, struct ipv6_opt_hdr *des printk(KERN_WARNING "destopt hao: invalid header length: %u\n", hao->length); goto bad; } - ipv6_addr_copy(&final_addr, &hao->addr); - ipv6_addr_copy(&hao->addr, &iph->saddr); - ipv6_addr_copy(&iph->saddr, &final_addr); + final_addr = hao->addr; + hao->addr = iph->saddr; + iph->saddr = final_addr; } break; } @@ -241,13 +241,13 @@ static void ipv6_rearrange_rthdr(struct ipv6hdr *iph, struct ipv6_rt_hdr *rthdr) segments = rthdr->hdrlen >> 1; addrs = ((struct rt0_hdr *)rthdr)->addr; - ipv6_addr_copy(&final_addr, addrs + segments - 1); + final_addr = addrs[segments - 1]; addrs += segments - segments_left; memmove(addrs + 1, addrs, (segments_left - 1) * sizeof(*addrs)); - ipv6_addr_copy(addrs, &iph->daddr); - ipv6_addr_copy(&iph->daddr, &final_addr); + addrs[0] = iph->daddr; + iph->daddr = final_addr; } static int ipv6_clear_mutable_options(struct ipv6hdr *iph, int len, int dir) @@ -324,8 +324,6 @@ static void ah6_output_done(struct crypto_async_request *base, int err) #endif } - err = ah->nexthdr; - kfree(AH_SKB_CB(skb)->tmp); xfrm_output_resume(skb, err); } @@ -466,12 +464,12 @@ static void ah6_input_done(struct crypto_async_request *base, int err) if (err) goto out; + err = ah->nexthdr; + skb->network_header += ah_hlen; memcpy(skb_network_header(skb), work_iph, hdr_len); __skb_pull(skb, ah_hlen + hdr_len); skb_set_transport_header(skb, -hdr_len); - - err = ah->nexthdr; out: kfree(AH_SKB_CB(skb)->tmp); xfrm_input_resume(skb, err); @@ -583,8 +581,6 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb) if (err == -EINPROGRESS) goto out; - if (err == -EBUSY) - err = NET_XMIT_DROP; goto out_free; } diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index 674255f5e6b7..59402b4637f9 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -75,7 +75,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr) if (pac == NULL) return -ENOMEM; pac->acl_next = NULL; - ipv6_addr_copy(&pac->acl_addr, addr); + pac->acl_addr = *addr; rcu_read_lock(); if (ifindex == 0) { @@ -83,7 +83,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr) rt = rt6_lookup(net, addr, NULL, 0, 0); if (rt) { - dev = rt->rt6i_dev; + dev = rt->dst.dev; dst_release(&rt->dst); } else if (ishost) { err = -EADDRNOTAVAIL; @@ -289,14 +289,14 @@ int ipv6_dev_ac_inc(struct net_device *dev, const struct in6_addr *addr) goto out; } - rt = addrconf_dst_alloc(idev, addr, 1); + rt = addrconf_dst_alloc(idev, addr, true); if (IS_ERR(rt)) { kfree(aca); err = PTR_ERR(rt); goto out; } - ipv6_addr_copy(&aca->aca_addr, addr); + aca->aca_addr = *addr; aca->aca_idev = idev; aca->aca_rt = rt; aca->aca_users = 1; diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index b46e9f88ce37..ae08aee1773c 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -71,7 +71,7 @@ int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); if (flowlabel == NULL) return -EINVAL; - ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst); + usin->sin6_addr = flowlabel->dst; } } @@ -143,7 +143,7 @@ ipv4_connected: } } - ipv6_addr_copy(&np->daddr, daddr); + np->daddr = *daddr; np->flow_label = fl6.flowlabel; inet->inet_dport = usin->sin6_port; @@ -154,8 +154,8 @@ ipv4_connected: */ fl6.flowi6_proto = sk->sk_protocol; - ipv6_addr_copy(&fl6.daddr, &np->daddr); - ipv6_addr_copy(&fl6.saddr, &np->saddr); + fl6.daddr = np->daddr; + fl6.saddr = np->saddr; fl6.flowi6_oif = sk->sk_bound_dev_if; fl6.flowi6_mark = sk->sk_mark; fl6.fl6_dport = inet->inet_dport; @@ -179,10 +179,10 @@ ipv4_connected: /* source address lookup done in ip6_dst_lookup */ if (ipv6_addr_any(&np->saddr)) - ipv6_addr_copy(&np->saddr, &fl6.saddr); + np->saddr = fl6.saddr; if (ipv6_addr_any(&np->rcv_saddr)) { - ipv6_addr_copy(&np->rcv_saddr, &fl6.saddr); + np->rcv_saddr = fl6.saddr; inet->inet_rcv_saddr = LOOPBACK4_IPV6; if (sk->sk_prot->rehash) sk->sk_prot->rehash(sk); @@ -257,7 +257,7 @@ void ipv6_local_error(struct sock *sk, int err, struct flowi6 *fl6, u32 info) skb_put(skb, sizeof(struct ipv6hdr)); skb_reset_network_header(skb); iph = ipv6_hdr(skb); - ipv6_addr_copy(&iph->daddr, &fl6->daddr); + iph->daddr = fl6->daddr; serr = SKB_EXT_ERR(skb); serr->ee.ee_errno = err; @@ -294,20 +294,16 @@ void ipv6_local_rxpmtu(struct sock *sk, struct flowi6 *fl6, u32 mtu) skb_put(skb, sizeof(struct ipv6hdr)); skb_reset_network_header(skb); iph = ipv6_hdr(skb); - ipv6_addr_copy(&iph->daddr, &fl6->daddr); + iph->daddr = fl6->daddr; mtu_info = IP6CBMTU(skb); - if (!mtu_info) { - kfree_skb(skb); - return; - } mtu_info->ip6m_mtu = mtu; mtu_info->ip6m_addr.sin6_family = AF_INET6; mtu_info->ip6m_addr.sin6_port = 0; mtu_info->ip6m_addr.sin6_flowinfo = 0; mtu_info->ip6m_addr.sin6_scope_id = fl6->flowi6_oif; - ipv6_addr_copy(&mtu_info->ip6m_addr.sin6_addr, &ipv6_hdr(skb)->daddr); + mtu_info->ip6m_addr.sin6_addr = ipv6_hdr(skb)->daddr; __skb_pull(skb, skb_tail_pointer(skb) - skb->data); skb_reset_transport_header(skb); @@ -358,8 +354,8 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len) sin->sin6_port = serr->port; sin->sin6_scope_id = 0; if (skb->protocol == htons(ETH_P_IPV6)) { - ipv6_addr_copy(&sin->sin6_addr, - (struct in6_addr *)(nh + serr->addr_offset)); + sin->sin6_addr = + *(struct in6_addr *)(nh + serr->addr_offset); if (np->sndflow) sin->sin6_flowinfo = (*(__be32 *)(nh + serr->addr_offset - 24) & @@ -380,7 +376,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len) sin->sin6_flowinfo = 0; sin->sin6_scope_id = 0; if (skb->protocol == htons(ETH_P_IPV6)) { - ipv6_addr_copy(&sin->sin6_addr, &ipv6_hdr(skb)->saddr); + sin->sin6_addr = ipv6_hdr(skb)->saddr; if (np->rxopt.all) datagram_recv_ctl(sk, msg, skb); if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL) @@ -455,7 +451,7 @@ int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len) sin->sin6_flowinfo = 0; sin->sin6_port = 0; sin->sin6_scope_id = mtu_info.ip6m_addr.sin6_scope_id; - ipv6_addr_copy(&sin->sin6_addr, &mtu_info.ip6m_addr.sin6_addr); + sin->sin6_addr = mtu_info.ip6m_addr.sin6_addr; } put_cmsg(msg, SOL_IPV6, IPV6_PATHMTU, sizeof(mtu_info), &mtu_info); @@ -479,7 +475,7 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb) struct in6_pktinfo src_info; src_info.ipi6_ifindex = opt->iif; - ipv6_addr_copy(&src_info.ipi6_addr, &ipv6_hdr(skb)->daddr); + src_info.ipi6_addr = ipv6_hdr(skb)->daddr; put_cmsg(msg, SOL_IPV6, IPV6_PKTINFO, sizeof(src_info), &src_info); } @@ -554,7 +550,7 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb) struct in6_pktinfo src_info; src_info.ipi6_ifindex = opt->iif; - ipv6_addr_copy(&src_info.ipi6_addr, &ipv6_hdr(skb)->daddr); + src_info.ipi6_addr = ipv6_hdr(skb)->daddr; put_cmsg(msg, SOL_IPV6, IPV6_2292PKTINFO, sizeof(src_info), &src_info); } if (np->rxopt.bits.rxohlim) { @@ -588,7 +584,7 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb) */ sin6.sin6_family = AF_INET6; - ipv6_addr_copy(&sin6.sin6_addr, &ipv6_hdr(skb)->daddr); + sin6.sin6_addr = ipv6_hdr(skb)->daddr; sin6.sin6_port = ports[1]; sin6.sin6_flowinfo = 0; sin6.sin6_scope_id = 0; @@ -658,12 +654,12 @@ int datagram_send_ctl(struct net *net, struct sock *sk, if (addr_type != IPV6_ADDR_ANY) { int strict = __ipv6_addr_src_scope(addr_type) <= IPV6_ADDR_SCOPE_LINKLOCAL; - if (!inet_sk(sk)->transparent && + if (!(inet_sk(sk)->freebind || inet_sk(sk)->transparent) && !ipv6_chk_addr(net, &src_info->ipi6_addr, strict ? dev : NULL, 0)) err = -EINVAL; else - ipv6_addr_copy(&fl6->saddr, &src_info->ipi6_addr); + fl6->saddr = src_info->ipi6_addr; } rcu_read_unlock(); diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 79a485e8a700..3d641b6e9b09 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -30,6 +30,7 @@ #include <linux/in6.h> #include <linux/icmpv6.h> #include <linux/slab.h> +#include <linux/export.h> #include <net/dst.h> #include <net/sock.h> @@ -242,9 +243,9 @@ static int ipv6_dest_hao(struct sk_buff *skb, int optoff) if (skb->ip_summed == CHECKSUM_COMPLETE) skb->ip_summed = CHECKSUM_NONE; - ipv6_addr_copy(&tmp_addr, &ipv6h->saddr); - ipv6_addr_copy(&ipv6h->saddr, &hao->addr); - ipv6_addr_copy(&hao->addr, &tmp_addr); + tmp_addr = ipv6h->saddr; + ipv6h->saddr = hao->addr; + hao->addr = tmp_addr; if (skb->tstamp.tv64 == 0) __net_timestamp(skb); @@ -273,12 +274,12 @@ static int ipv6_destopt_rcv(struct sk_buff *skb) #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) __u16 dstbuf; #endif - struct dst_entry *dst; + struct dst_entry *dst = skb_dst(skb); if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) || !pskb_may_pull(skb, (skb_transport_offset(skb) + ((skb_transport_header(skb)[1] + 1) << 3)))) { - IP6_INC_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)), + IP6_INC_STATS_BH(dev_net(dst->dev), ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS); kfree_skb(skb); return -1; @@ -289,9 +290,7 @@ static int ipv6_destopt_rcv(struct sk_buff *skb) dstbuf = opt->dst1; #endif - dst = dst_clone(skb_dst(skb)); if (ip6_parse_tlv(tlvprocdestopt_lst, skb)) { - dst_release(dst); skb->transport_header += (skb_transport_header(skb)[1] + 1) << 3; opt = IP6CB(skb); #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) @@ -304,7 +303,6 @@ static int ipv6_destopt_rcv(struct sk_buff *skb) IP6_INC_STATS_BH(dev_net(dst->dev), ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS); - dst_release(dst); return -1; } @@ -463,9 +461,9 @@ looped_back: return -1; } - ipv6_addr_copy(&daddr, addr); - ipv6_addr_copy(addr, &ipv6_hdr(skb)->daddr); - ipv6_addr_copy(&ipv6_hdr(skb)->daddr, &daddr); + daddr = *addr; + *addr = ipv6_hdr(skb)->daddr; + ipv6_hdr(skb)->daddr = daddr; skb_dst_drop(skb); ip6_route_input(skb); @@ -692,7 +690,7 @@ static void ipv6_push_rthdr(struct sk_buff *skb, u8 *proto, memcpy(phdr->addr, ihdr->addr + 1, (hops - 1) * sizeof(struct in6_addr)); - ipv6_addr_copy(phdr->addr + (hops - 1), *addr_p); + phdr->addr[hops - 1] = **addr_p; *addr_p = ihdr->addr; phdr->rt_hdr.nexthdr = *proto; @@ -890,8 +888,8 @@ struct in6_addr *fl6_update_dst(struct flowi6 *fl6, if (!opt || !opt->srcrt) return NULL; - ipv6_addr_copy(orig, &fl6->daddr); - ipv6_addr_copy(&fl6->daddr, ((struct rt0_hdr *)opt->srcrt)->addr); + *orig = fl6->daddr; + fl6->daddr = *((struct rt0_hdr *)opt->srcrt)->addr; return orig; } diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c index 14ed0a955b56..72957f4a7c6c 100644 --- a/net/ipv6/exthdrs_core.c +++ b/net/ipv6/exthdrs_core.c @@ -2,6 +2,7 @@ * IPv6 library code, needed by static components when full IPv6 support is * not configured or static. */ +#include <linux/export.h> #include <net/ipv6.h> /* @@ -56,6 +57,9 @@ int ipv6_ext_hdr(u8 nexthdr) * it returns NULL. * - First fragment header is skipped, not-first ones * are considered as unparsable. + * - Reports the offset field of the final fragment header so it is + * possible to tell whether this is a first fragment, later fragment, + * or not fragmented. * - ESP is unparsable for now and considered like * normal payload protocol. * - Note also special handling of AUTH header. Thanks to IPsec wizards. @@ -63,10 +67,13 @@ int ipv6_ext_hdr(u8 nexthdr) * --ANK (980726) */ -int ipv6_skip_exthdr(const struct sk_buff *skb, int start, u8 *nexthdrp) +int ipv6_skip_exthdr(const struct sk_buff *skb, int start, u8 *nexthdrp, + __be16 *frag_offp) { u8 nexthdr = *nexthdrp; + *frag_offp = 0; + while (ipv6_ext_hdr(nexthdr)) { struct ipv6_opt_hdr _hdr, *hp; int hdrlen; @@ -86,7 +93,8 @@ int ipv6_skip_exthdr(const struct sk_buff *skb, int start, u8 *nexthdrp) if (fp == NULL) return -1; - if (ntohs(*fp) & ~0x7) + *frag_offp = *fp; + if (ntohs(*frag_offp) & ~0x7) break; hdrlen = 8; } else if (nexthdr == NEXTHDR_AUTH) diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index 34d244df907d..b6c573152067 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -14,6 +14,7 @@ */ #include <linux/netdevice.h> +#include <linux/export.h> #include <net/fib_rules.h> #include <net/ipv6.h> @@ -95,7 +96,7 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp, if (!ipv6_prefix_equal(&saddr, &r->src.addr, r->src.plen)) goto again; - ipv6_addr_copy(&flp6->saddr, &saddr); + flp6->saddr = saddr; } goto out; } diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 11900417b1cc..01d46bff63c3 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -135,11 +135,12 @@ static int is_ineligible(struct sk_buff *skb) int ptr = (u8 *)(ipv6_hdr(skb) + 1) - skb->data; int len = skb->len - ptr; __u8 nexthdr = ipv6_hdr(skb)->nexthdr; + __be16 frag_off; if (len < 0) return 1; - ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr); + ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr, &frag_off); if (ptr < 0) return 0; if (nexthdr == IPPROTO_ICMPV6) { @@ -290,9 +291,9 @@ static void mip6_addr_swap(struct sk_buff *skb) if (likely(off >= 0)) { hao = (struct ipv6_destopt_hao *) (skb_network_header(skb) + off); - ipv6_addr_copy(&tmp, &iph->saddr); - ipv6_addr_copy(&iph->saddr, &hao->addr); - ipv6_addr_copy(&hao->addr, &tmp); + tmp = iph->saddr; + iph->saddr = hao->addr; + hao->addr = tmp; } } } @@ -444,9 +445,9 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_proto = IPPROTO_ICMPV6; - ipv6_addr_copy(&fl6.daddr, &hdr->saddr); + fl6.daddr = hdr->saddr; if (saddr) - ipv6_addr_copy(&fl6.saddr, saddr); + fl6.saddr = *saddr; fl6.flowi6_oif = iif; fl6.fl6_icmp_type = type; fl6.fl6_icmp_code = code; @@ -490,7 +491,8 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) goto out_dst_release; } - idev = in6_dev_get(skb->dev); + rcu_read_lock(); + idev = __in6_dev_get(skb->dev); err = ip6_append_data(sk, icmpv6_getfrag, &msg, len + sizeof(struct icmp6hdr), @@ -500,19 +502,16 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) if (err) { ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTERRORS); ip6_flush_pending_frames(sk); - goto out_put; + } else { + err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr, + len + sizeof(struct icmp6hdr)); } - err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr, len + sizeof(struct icmp6hdr)); - -out_put: - if (likely(idev != NULL)) - in6_dev_put(idev); + rcu_read_unlock(); out_dst_release: dst_release(dst); out: icmpv6_xmit_unlock(sk); } - EXPORT_SYMBOL(icmpv6_send); static void icmpv6_echo_reply(struct sk_buff *skb) @@ -540,9 +539,9 @@ static void icmpv6_echo_reply(struct sk_buff *skb) memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_proto = IPPROTO_ICMPV6; - ipv6_addr_copy(&fl6.daddr, &ipv6_hdr(skb)->saddr); + fl6.daddr = ipv6_hdr(skb)->saddr; if (saddr) - ipv6_addr_copy(&fl6.saddr, saddr); + fl6.saddr = *saddr; fl6.flowi6_oif = skb->dev->ifindex; fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY; security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); @@ -569,7 +568,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) if (hlimit < 0) hlimit = ip6_dst_hoplimit(dst); - idev = in6_dev_get(skb->dev); + idev = __in6_dev_get(skb->dev); msg.skb = skb; msg.offset = 0; @@ -583,13 +582,10 @@ static void icmpv6_echo_reply(struct sk_buff *skb) if (err) { ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTERRORS); ip6_flush_pending_frames(sk); - goto out_put; + } else { + err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr, + skb->len + sizeof(struct icmp6hdr)); } - err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr, skb->len + sizeof(struct icmp6hdr)); - -out_put: - if (likely(idev != NULL)) - in6_dev_put(idev); dst_release(dst); out: icmpv6_xmit_unlock(sk); @@ -601,6 +597,7 @@ static void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info) int inner_offset; int hash; u8 nexthdr; + __be16 frag_off; if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) return; @@ -608,7 +605,8 @@ static void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info) nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr; if (ipv6_ext_hdr(nexthdr)) { /* now skip over extension headers */ - inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr); + inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), + &nexthdr, &frag_off); if (inner_offset<0) return; } else { @@ -791,8 +789,8 @@ void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6, int oif) { memset(fl6, 0, sizeof(*fl6)); - ipv6_addr_copy(&fl6->saddr, saddr); - ipv6_addr_copy(&fl6->daddr, daddr); + fl6->saddr = *saddr; + fl6->daddr = *daddr; fl6->flowi6_proto = IPPROTO_ICMPV6; fl6->fl6_icmp_type = type; fl6->fl6_icmp_code = 0; @@ -840,8 +838,7 @@ static int __net_init icmpv6_sk_init(struct net *net) /* Enough space for 2 64K ICMP packets, including * sk_buff struct overhead. */ - sk->sk_sndbuf = - (2 * ((64 * 1024) + sizeof(struct sk_buff))); + sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024); } return 0; diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 8a58e8cf6646..02dd203d9eac 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -65,9 +65,9 @@ struct dst_entry *inet6_csk_route_req(struct sock *sk, memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_proto = IPPROTO_TCP; - ipv6_addr_copy(&fl6.daddr, &treq->rmt_addr); + fl6.daddr = treq->rmt_addr; final_p = fl6_update_dst(&fl6, np->opt, &final); - ipv6_addr_copy(&fl6.saddr, &treq->loc_addr); + fl6.saddr = treq->loc_addr; fl6.flowi6_oif = sk->sk_bound_dev_if; fl6.flowi6_mark = sk->sk_mark; fl6.fl6_dport = inet_rsk(req)->rmt_port; @@ -85,7 +85,7 @@ struct dst_entry *inet6_csk_route_req(struct sock *sk, * request_sock (formerly open request) hash tables. */ static u32 inet6_synq_hash(const struct in6_addr *raddr, const __be16 rport, - const u32 rnd, const u16 synq_hsize) + const u32 rnd, const u32 synq_hsize) { u32 c; @@ -157,7 +157,7 @@ void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr) struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr; sin6->sin6_family = AF_INET6; - ipv6_addr_copy(&sin6->sin6_addr, &np->daddr); + sin6->sin6_addr = np->daddr; sin6->sin6_port = inet_sk(sk)->inet_dport; /* We do not store received flowlabel for TCP */ sin6->sin6_flowinfo = 0; @@ -211,11 +211,12 @@ int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl_unused) struct flowi6 fl6; struct dst_entry *dst; struct in6_addr *final_p, final; + int res; memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_proto = sk->sk_protocol; - ipv6_addr_copy(&fl6.daddr, &np->daddr); - ipv6_addr_copy(&fl6.saddr, &np->saddr); + fl6.daddr = np->daddr; + fl6.saddr = np->saddr; fl6.flowlabel = np->flow_label; IP6_ECN_flow_xmit(sk, fl6.flowlabel); fl6.flowi6_oif = sk->sk_bound_dev_if; @@ -241,12 +242,14 @@ int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl_unused) __inet6_csk_dst_store(sk, dst, NULL, NULL); } - skb_dst_set(skb, dst_clone(dst)); + rcu_read_lock(); + skb_dst_set_noref(skb, dst); /* Restore final destination back after routing done */ - ipv6_addr_copy(&fl6.daddr, &np->daddr); + fl6.daddr = np->daddr; - return ip6_xmit(sk, skb, &fl6, np->opt); + res = ip6_xmit(sk, skb, &fl6, np->opt, np->tclass); + rcu_read_unlock(); + return res; } - EXPORT_SYMBOL_GPL(inet6_csk_xmit); diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 320d91d20ad7..b82bcde53f7a 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -28,10 +28,6 @@ #include <linux/list.h> #include <linux/slab.h> -#ifdef CONFIG_PROC_FS -#include <linux/proc_fs.h> -#endif - #include <net/ipv6.h> #include <net/ndisc.h> #include <net/addrconf.h> @@ -194,7 +190,7 @@ static struct fib6_table *fib6_alloc_table(struct net *net, u32 id) struct fib6_table *table; table = kzalloc(sizeof(*table), GFP_ATOMIC); - if (table != NULL) { + if (table) { table->tb6_id = id; table->tb6_root.leaf = net->ipv6.ip6_null_entry; table->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO; @@ -214,7 +210,7 @@ struct fib6_table *fib6_new_table(struct net *net, u32 id) return tb; tb = fib6_alloc_table(net, id); - if (tb != NULL) + if (tb) fib6_link_table(net, tb); return tb; @@ -371,7 +367,7 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) s_e = cb->args[1]; w = (void *)cb->args[2]; - if (w == NULL) { + if (!w) { /* New dump: * * 1. hook callback destructor. @@ -383,7 +379,7 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) * 2. allocate and initialize walker. */ w = kzalloc(sizeof(*w), GFP_ATOMIC); - if (w == NULL) + if (!w) return -ENOMEM; w->func = fib6_dump_node; cb->args[2] = (long)w; @@ -429,7 +425,8 @@ out: static struct fib6_node * fib6_add_1(struct fib6_node *root, void *addr, int addrlen, int plen, - int offset) + int offset, int allow_create, + int replace_required) { struct fib6_node *fn, *in, *ln; struct fib6_node *pn = NULL; @@ -451,8 +448,18 @@ static struct fib6_node * fib6_add_1(struct fib6_node *root, void *addr, * Prefix match */ if (plen < fn->fn_bit || - !ipv6_prefix_equal(&key->addr, addr, fn->fn_bit)) + !ipv6_prefix_equal(&key->addr, addr, fn->fn_bit)) { + if (!allow_create) { + if (replace_required) { + pr_warn("IPv6: Can't replace route, " + "no match found\n"); + return ERR_PTR(-ENOENT); + } + pr_warn("IPv6: NLM_F_CREATE should be set " + "when creating new route\n"); + } goto insert_above; + } /* * Exact match ? @@ -460,7 +467,7 @@ static struct fib6_node * fib6_add_1(struct fib6_node *root, void *addr, if (plen == fn->fn_bit) { /* clean up an intermediate node */ - if ((fn->fn_flags & RTN_RTINFO) == 0) { + if (!(fn->fn_flags & RTN_RTINFO)) { rt6_release(fn->leaf); fn->leaf = NULL; } @@ -481,6 +488,23 @@ static struct fib6_node * fib6_add_1(struct fib6_node *root, void *addr, fn = dir ? fn->right: fn->left; } while (fn); + if (!allow_create) { + /* We should not create new node because + * NLM_F_REPLACE was specified without NLM_F_CREATE + * I assume it is safe to require NLM_F_CREATE when + * REPLACE flag is used! Later we may want to remove the + * check for replace_required, because according + * to netlink specification, NLM_F_CREATE + * MUST be specified if new route is created. + * That would keep IPv6 consistent with IPv4 + */ + if (replace_required) { + pr_warn("IPv6: Can't replace route, no match found\n"); + return ERR_PTR(-ENOENT); + } + pr_warn("IPv6: NLM_F_CREATE should be set " + "when creating new route\n"); + } /* * We walked to the bottom of tree. * Create new leaf node without children. @@ -488,7 +512,7 @@ static struct fib6_node * fib6_add_1(struct fib6_node *root, void *addr, ln = node_alloc(); - if (ln == NULL) + if (!ln) return NULL; ln->fn_bit = plen; @@ -531,7 +555,7 @@ insert_above: in = node_alloc(); ln = node_alloc(); - if (in == NULL || ln == NULL) { + if (!in || !ln) { if (in) node_free(in); if (ln) @@ -585,7 +609,7 @@ insert_above: ln = node_alloc(); - if (ln == NULL) + if (!ln) return NULL; ln->fn_bit = plen; @@ -618,10 +642,15 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, { struct rt6_info *iter = NULL; struct rt6_info **ins; + int replace = (info->nlh && + (info->nlh->nlmsg_flags & NLM_F_REPLACE)); + int add = (!info->nlh || + (info->nlh->nlmsg_flags & NLM_F_CREATE)); + int found = 0; ins = &fn->leaf; - for (iter = fn->leaf; iter; iter=iter->dst.rt6_next) { + for (iter = fn->leaf; iter; iter = iter->dst.rt6_next) { /* * Search for duplicates */ @@ -630,17 +659,24 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, /* * Same priority level */ + if (info->nlh && + (info->nlh->nlmsg_flags & NLM_F_EXCL)) + return -EEXIST; + if (replace) { + found++; + break; + } - if (iter->rt6i_dev == rt->rt6i_dev && + if (iter->dst.dev == rt->dst.dev && iter->rt6i_idev == rt->rt6i_idev && ipv6_addr_equal(&iter->rt6i_gateway, &rt->rt6i_gateway)) { - if (!(iter->rt6i_flags&RTF_EXPIRES)) + if (!(iter->rt6i_flags & RTF_EXPIRES)) return -EEXIST; - iter->rt6i_expires = rt->rt6i_expires; - if (!(rt->rt6i_flags&RTF_EXPIRES)) { + iter->dst.expires = rt->dst.expires; + if (!(rt->rt6i_flags & RTF_EXPIRES)) { iter->rt6i_flags &= ~RTF_EXPIRES; - iter->rt6i_expires = 0; + iter->dst.expires = 0; } return -EEXIST; } @@ -659,17 +695,40 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, /* * insert node */ + if (!replace) { + if (!add) + pr_warn("IPv6: NLM_F_CREATE should be set when creating new route\n"); + +add: + rt->dst.rt6_next = iter; + *ins = rt; + rt->rt6i_node = fn; + atomic_inc(&rt->rt6i_ref); + inet6_rt_notify(RTM_NEWROUTE, rt, info); + info->nl_net->ipv6.rt6_stats->fib_rt_entries++; + + if (!(fn->fn_flags & RTN_RTINFO)) { + info->nl_net->ipv6.rt6_stats->fib_route_nodes++; + fn->fn_flags |= RTN_RTINFO; + } - rt->dst.rt6_next = iter; - *ins = rt; - rt->rt6i_node = fn; - atomic_inc(&rt->rt6i_ref); - inet6_rt_notify(RTM_NEWROUTE, rt, info); - info->nl_net->ipv6.rt6_stats->fib_rt_entries++; - - if ((fn->fn_flags & RTN_RTINFO) == 0) { - info->nl_net->ipv6.rt6_stats->fib_route_nodes++; - fn->fn_flags |= RTN_RTINFO; + } else { + if (!found) { + if (add) + goto add; + pr_warn("IPv6: NLM_F_REPLACE set, but no existing node found!\n"); + return -ENOENT; + } + *ins = rt; + rt->rt6i_node = fn; + rt->dst.rt6_next = iter->dst.rt6_next; + atomic_inc(&rt->rt6i_ref); + inet6_rt_notify(RTM_NEWROUTE, rt, info); + rt6_release(iter); + if (!(fn->fn_flags & RTN_RTINFO)) { + info->nl_net->ipv6.rt6_stats->fib_route_nodes++; + fn->fn_flags |= RTN_RTINFO; + } } return 0; @@ -678,7 +737,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, static __inline__ void fib6_start_gc(struct net *net, struct rt6_info *rt) { if (!timer_pending(&net->ipv6.ip6_fib_timer) && - (rt->rt6i_flags & (RTF_EXPIRES|RTF_CACHE))) + (rt->rt6i_flags & (RTF_EXPIRES | RTF_CACHE))) mod_timer(&net->ipv6.ip6_fib_timer, jiffies + net->ipv6.sysctl.ip6_rt_gc_interval); } @@ -700,11 +759,28 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info) { struct fib6_node *fn, *pn = NULL; int err = -ENOMEM; + int allow_create = 1; + int replace_required = 0; + + if (info->nlh) { + if (!(info->nlh->nlmsg_flags & NLM_F_CREATE)) + allow_create = 0; + if (info->nlh->nlmsg_flags & NLM_F_REPLACE) + replace_required = 1; + } + if (!allow_create && !replace_required) + pr_warn("IPv6: RTM_NEWROUTE with no NLM_F_CREATE or NLM_F_REPLACE\n"); fn = fib6_add_1(root, &rt->rt6i_dst.addr, sizeof(struct in6_addr), - rt->rt6i_dst.plen, offsetof(struct rt6_info, rt6i_dst)); + rt->rt6i_dst.plen, offsetof(struct rt6_info, rt6i_dst), + allow_create, replace_required); - if (fn == NULL) + if (IS_ERR(fn)) { + err = PTR_ERR(fn); + fn = NULL; + } + + if (!fn) goto out; pn = fn; @@ -713,7 +789,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info) if (rt->rt6i_src.plen) { struct fib6_node *sn; - if (fn->subtree == NULL) { + if (!fn->subtree) { struct fib6_node *sfn; /* @@ -728,7 +804,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info) /* Create subtree root node */ sfn = node_alloc(); - if (sfn == NULL) + if (!sfn) goto st_failure; sfn->leaf = info->nl_net->ipv6.ip6_null_entry; @@ -740,9 +816,10 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info) sn = fib6_add_1(sfn, &rt->rt6i_src.addr, sizeof(struct in6_addr), rt->rt6i_src.plen, - offsetof(struct rt6_info, rt6i_src)); + offsetof(struct rt6_info, rt6i_src), + allow_create, replace_required); - if (sn == NULL) { + if (!sn) { /* If it is failed, discard just allocated root, and then (in st_failure) stale node in main tree. @@ -757,13 +834,18 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info) } else { sn = fib6_add_1(fn->subtree, &rt->rt6i_src.addr, sizeof(struct in6_addr), rt->rt6i_src.plen, - offsetof(struct rt6_info, rt6i_src)); + offsetof(struct rt6_info, rt6i_src), + allow_create, replace_required); - if (sn == NULL) + if (IS_ERR(sn)) { + err = PTR_ERR(sn); + sn = NULL; + } + if (!sn) goto st_failure; } - if (fn->leaf == NULL) { + if (!fn->leaf) { fn->leaf = rt; atomic_inc(&rt->rt6i_ref); } @@ -772,10 +854,9 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info) #endif err = fib6_add_rt2node(fn, rt, info); - - if (err == 0) { + if (!err) { fib6_start_gc(info->nl_net, rt); - if (!(rt->rt6i_flags&RTF_CACHE)) + if (!(rt->rt6i_flags & RTF_CACHE)) fib6_prune_clones(info->nl_net, pn, rt); } @@ -823,7 +904,7 @@ st_failure: */ struct lookup_args { - int offset; /* key offset on rt6_info */ + int offset; /* key offset on rt6_info */ const struct in6_addr *addr; /* search key */ }; @@ -853,11 +934,10 @@ static struct fib6_node * fib6_lookup_1(struct fib6_node *root, fn = next; continue; } - break; } - while(fn) { + while (fn) { if (FIB6_SUBTREE(fn) || fn->fn_flags & RTN_RTINFO) { struct rt6key *key; @@ -904,8 +984,7 @@ struct fib6_node * fib6_lookup(struct fib6_node *root, const struct in6_addr *da }; fn = fib6_lookup_1(root, daddr ? args : args + 1); - - if (fn == NULL || fn->fn_flags & RTN_TL_ROOT) + if (!fn || fn->fn_flags & RTN_TL_ROOT) fn = root; return fn; @@ -965,7 +1044,7 @@ struct fib6_node * fib6_locate(struct fib6_node *root, } #endif - if (fn && fn->fn_flags&RTN_RTINFO) + if (fn && fn->fn_flags & RTN_RTINFO) return fn; return NULL; @@ -979,14 +1058,13 @@ struct fib6_node * fib6_locate(struct fib6_node *root, static struct rt6_info *fib6_find_prefix(struct net *net, struct fib6_node *fn) { - if (fn->fn_flags&RTN_ROOT) + if (fn->fn_flags & RTN_ROOT) return net->ipv6.ip6_null_entry; - while(fn) { - if(fn->left) + while (fn) { + if (fn->left) return fn->left->leaf; - - if(fn->right) + if (fn->right) return fn->right->leaf; fn = FIB6_SUBTREE(fn); @@ -1024,12 +1102,12 @@ static struct fib6_node *fib6_repair_tree(struct net *net, if (children == 3 || FIB6_SUBTREE(fn) #ifdef CONFIG_IPV6_SUBTREES /* Subtree root (i.e. fn) may have one child */ - || (children && fn->fn_flags&RTN_ROOT) + || (children && fn->fn_flags & RTN_ROOT) #endif ) { fn->leaf = fib6_find_prefix(net, fn); #if RT6_DEBUG >= 2 - if (fn->leaf==NULL) { + if (!fn->leaf) { WARN_ON(!fn->leaf); fn->leaf = net->ipv6.ip6_null_entry; } @@ -1062,7 +1140,7 @@ static struct fib6_node *fib6_repair_tree(struct net *net, read_lock(&fib6_walker_lock); FOR_WALKERS(w) { - if (child == NULL) { + if (!child) { if (w->root == fn) { w->root = w->node = NULL; RT6_TRACE("W %p adjusted by delroot 1\n", w); @@ -1091,7 +1169,7 @@ static struct fib6_node *fib6_repair_tree(struct net *net, read_unlock(&fib6_walker_lock); node_free(fn); - if (pn->fn_flags&RTN_RTINFO || FIB6_SUBTREE(pn)) + if (pn->fn_flags & RTN_RTINFO || FIB6_SUBTREE(pn)) return pn; rt6_release(pn->leaf); @@ -1125,7 +1203,7 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp, if (w->state == FWS_C && w->leaf == rt) { RT6_TRACE("walker %p adjusted by delroute\n", w); w->leaf = rt->dst.rt6_next; - if (w->leaf == NULL) + if (!w->leaf) w->state = FWS_U; } } @@ -1134,7 +1212,7 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp, rt->dst.rt6_next = NULL; /* If it was last route, expunge its radix tree node */ - if (fn->leaf == NULL) { + if (!fn->leaf) { fn->fn_flags &= ~RTN_RTINFO; net->ipv6.rt6_stats->fib_route_nodes--; fn = fib6_repair_tree(net, fn); @@ -1148,7 +1226,7 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp, * to still alive ones. */ while (fn) { - if (!(fn->fn_flags&RTN_RTINFO) && fn->leaf == rt) { + if (!(fn->fn_flags & RTN_RTINFO) && fn->leaf == rt) { fn->leaf = fib6_find_prefix(net, fn); atomic_inc(&fn->leaf->rt6i_ref); rt6_release(rt); @@ -1175,17 +1253,17 @@ int fib6_del(struct rt6_info *rt, struct nl_info *info) return -ENOENT; } #endif - if (fn == NULL || rt == net->ipv6.ip6_null_entry) + if (!fn || rt == net->ipv6.ip6_null_entry) return -ENOENT; WARN_ON(!(fn->fn_flags & RTN_RTINFO)); - if (!(rt->rt6i_flags&RTF_CACHE)) { + if (!(rt->rt6i_flags & RTF_CACHE)) { struct fib6_node *pn = fn; #ifdef CONFIG_IPV6_SUBTREES /* clones of this route might be in another subtree */ if (rt->rt6i_src.plen) { - while (!(pn->fn_flags&RTN_ROOT)) + while (!(pn->fn_flags & RTN_ROOT)) pn = pn->parent; pn = pn->parent; } @@ -1236,11 +1314,11 @@ static int fib6_walk_continue(struct fib6_walker_t *w) for (;;) { fn = w->node; - if (fn == NULL) + if (!fn) return 0; if (w->prune && fn != w->root && - fn->fn_flags&RTN_RTINFO && w->state < FWS_C) { + fn->fn_flags & RTN_RTINFO && w->state < FWS_C) { w->state = FWS_C; w->leaf = fn->leaf; } @@ -1269,7 +1347,7 @@ static int fib6_walk_continue(struct fib6_walker_t *w) w->state = FWS_C; w->leaf = fn->leaf; case FWS_C: - if (w->leaf && fn->fn_flags&RTN_RTINFO) { + if (w->leaf && fn->fn_flags & RTN_RTINFO) { int err; if (w->count < w->skip) { @@ -1384,6 +1462,26 @@ static void fib6_clean_tree(struct net *net, struct fib6_node *root, fib6_walk(&c.w); } +void fib6_clean_all_ro(struct net *net, int (*func)(struct rt6_info *, void *arg), + int prune, void *arg) +{ + struct fib6_table *table; + struct hlist_node *node; + struct hlist_head *head; + unsigned int h; + + rcu_read_lock(); + for (h = 0; h < FIB6_TABLE_HASHSZ; h++) { + head = &net->ipv6.fib_table_hash[h]; + hlist_for_each_entry_rcu(table, node, head, tb6_hlist) { + read_lock_bh(&table->tb6_lock); + fib6_clean_tree(net, &table->tb6_root, + func, prune, arg); + read_unlock_bh(&table->tb6_lock); + } + } + rcu_read_unlock(); +} void fib6_clean_all(struct net *net, int (*func)(struct rt6_info *, void *arg), int prune, void *arg) { @@ -1443,8 +1541,8 @@ static int fib6_age(struct rt6_info *rt, void *arg) * only if they are not in use now. */ - if (rt->rt6i_flags&RTF_EXPIRES && rt->rt6i_expires) { - if (time_after(now, rt->rt6i_expires)) { + if (rt->rt6i_flags & RTF_EXPIRES && rt->dst.expires) { + if (time_after(now, rt->dst.expires)) { RT6_TRACE("expiring %p\n", rt); return -1; } @@ -1455,7 +1553,7 @@ static int fib6_age(struct rt6_info *rt, void *arg) RT6_TRACE("aging clone %p\n", rt); return -1; } else if ((rt->rt6i_flags & RTF_GATEWAY) && - (!(dst_get_neighbour_raw(&rt->dst)->flags & NTF_ROUTER))) { + (!(dst_get_neighbour_noref_raw(&rt->dst)->flags & NTF_ROUTER))) { RT6_TRACE("purging route %p via non-router but gateway\n", rt); return -1; diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index 543039450193..b7867a1215b1 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -21,6 +21,7 @@ #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/slab.h> +#include <linux/export.h> #include <net/net_namespace.h> #include <net/sock.h> @@ -385,7 +386,7 @@ fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq, err = -EINVAL; goto done; } - ipv6_addr_copy(&fl->dst, &freq->flr_dst); + fl->dst = freq->flr_dst; atomic_set(&fl->users, 1); switch (fl->share) { case IPV6_FL_S_EXCL: diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 027c7ff6f1e5..1ca5d45a12e8 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -111,6 +111,14 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt ipv6_addr_loopback(&hdr->daddr)) goto err; + /* + * RFC4291 2.7 + * Multicast addresses must not be used as source addresses in IPv6 + * packets or appear in any Routing header. + */ + if (ipv6_addr_is_multicast(&hdr->saddr)) + goto err; + skb->transport_header = skb->network_header + sizeof(*hdr); IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr); @@ -272,6 +280,7 @@ int ip6_mc_input(struct sk_buff *skb) u8 *ptr = skb_network_header(skb) + opt->ra; struct icmp6hdr *icmp6; u8 nexthdr = hdr->nexthdr; + __be16 frag_off; int offset; /* Check if the value of Router Alert @@ -285,7 +294,7 @@ int ip6_mc_input(struct sk_buff *skb) goto out; } offset = ipv6_skip_exthdr(skb, sizeof(*hdr), - &nexthdr); + &nexthdr, &frag_off); if (offset < 0) goto out; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 4c882cf4e8a1..d97e07183ce9 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -136,7 +136,7 @@ static int ip6_finish_output2(struct sk_buff *skb) } rcu_read_lock(); - neigh = dst_get_neighbour(dst); + neigh = dst_get_neighbour_noref(dst); if (neigh) { int res = neigh_output(neigh, skb); @@ -180,7 +180,7 @@ int ip6_output(struct sk_buff *skb) */ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, - struct ipv6_txoptions *opt) + struct ipv6_txoptions *opt, int tclass) { struct net *net = sock_net(sk); struct ipv6_pinfo *np = inet6_sk(sk); @@ -190,7 +190,6 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, u8 proto = fl6->flowi6_proto; int seg_len = skb->len; int hlimit = -1; - int tclass = 0; u32 mtu; if (opt) { @@ -228,10 +227,8 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, /* * Fill in the IPv6 header */ - if (np) { - tclass = np->tclass; + if (np) hlimit = np->hop_limit; - } if (hlimit < 0) hlimit = ip6_dst_hoplimit(dst); @@ -241,8 +238,8 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, hdr->nexthdr = proto; hdr->hop_limit = hlimit; - ipv6_addr_copy(&hdr->saddr, &fl6->saddr); - ipv6_addr_copy(&hdr->daddr, first_hop); + hdr->saddr = fl6->saddr; + hdr->daddr = *first_hop; skb->priority = sk->sk_priority; skb->mark = sk->sk_mark; @@ -293,8 +290,8 @@ int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev, hdr->nexthdr = proto; hdr->hop_limit = np->hop_limit; - ipv6_addr_copy(&hdr->saddr, saddr); - ipv6_addr_copy(&hdr->daddr, daddr); + hdr->saddr = *saddr; + hdr->daddr = *daddr; return 0; } @@ -332,10 +329,11 @@ static int ip6_forward_proxy_check(struct sk_buff *skb) { struct ipv6hdr *hdr = ipv6_hdr(skb); u8 nexthdr = hdr->nexthdr; + __be16 frag_off; int offset; if (ipv6_ext_hdr(nexthdr)) { - offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr); + offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off); if (offset < 0) return 0; } else @@ -465,7 +463,7 @@ int ip6_forward(struct sk_buff *skb) send redirects to source routed frames. We don't send redirects to frames decapsulated from IPsec. */ - n = dst_get_neighbour(dst); + n = dst_get_neighbour_noref(dst); if (skb->dev == dst->dev && n && opt->srcrt == 0 && !skb_sec_path(skb)) { struct in6_addr *target = NULL; struct rt6_info *rt; @@ -606,7 +604,7 @@ void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt) static atomic_t ipv6_fragmentation_id; int old, new; - if (rt) { + if (rt && !(rt->dst.flags & DST_NOPEER)) { struct inet_peer *peer; if (!rt->rt6i_peer) @@ -634,6 +632,7 @@ int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) struct ipv6hdr *tmp_hdr; struct frag_hdr *fh; unsigned int mtu, hlen, left, len; + int hroom, troom; __be32 frag_id = 0; int ptr, offset = 0, err=0; u8 *prevhdr, nexthdr = 0; @@ -800,6 +799,8 @@ slow_path: */ *prevhdr = NEXTHDR_FRAGMENT; + hroom = LL_RESERVED_SPACE(rt->dst.dev); + troom = rt->dst.dev->needed_tailroom; /* * Keep copying data until we run out. @@ -818,7 +819,8 @@ slow_path: * Allocate buffer. */ - if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->dst.dev), GFP_ATOMIC)) == NULL) { + if ((frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) + + hroom + troom, GFP_ATOMIC)) == NULL) { NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n"); IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS); @@ -831,7 +833,7 @@ slow_path: */ ip6_copy_metadata(frag, skb); - skb_reserve(frag, LL_RESERVED_SPACE(rt->dst.dev)); + skb_reserve(frag, hroom); skb_put(frag, len + hlen + sizeof(struct frag_hdr)); skb_reset_network_header(frag); fh = (struct frag_hdr *)(skb_network_header(frag) + hlen); @@ -981,7 +983,7 @@ static int ip6_dst_lookup_tail(struct sock *sk, * dst entry of the nexthop router */ rcu_read_lock(); - n = dst_get_neighbour(*dst); + n = dst_get_neighbour_noref(*dst); if (n && !(n->nud_state & NUD_VALID)) { struct inet6_ifaddr *ifp; struct flowi6 fl_gw6; @@ -1062,7 +1064,7 @@ struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, if (err) return ERR_PTR(err); if (final_dst) - ipv6_addr_copy(&fl6->daddr, final_dst); + fl6->daddr = *final_dst; if (can_sleep) fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP; @@ -1098,7 +1100,7 @@ struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, if (err) return ERR_PTR(err); if (final_dst) - ipv6_addr_copy(&fl6->daddr, final_dst); + fl6->daddr = *final_dst; if (can_sleep) fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP; @@ -1126,7 +1128,7 @@ static inline int ip6_ufo_append_data(struct sock *sk, hh_len + fragheaderlen + transhdrlen + 20, (flags & MSG_DONTWAIT), &err); if (skb == NULL) - return -ENOMEM; + return err; /* reserve space for Hardware header */ skb_reserve(skb, hh_len); @@ -1193,6 +1195,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, struct sk_buff *skb; unsigned int maxfraglen, fragheaderlen; int exthdrlen; + int dst_exthdrlen; int hh_len; int mtu; int copy; @@ -1248,7 +1251,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, np->cork.hop_limit = hlimit; np->cork.tclass = tclass; mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ? - rt->dst.dev->mtu : dst_mtu(rt->dst.path); + rt->dst.dev->mtu : dst_mtu(&rt->dst); if (np->frag_size < mtu) { if (np->frag_size) mtu = np->frag_size; @@ -1259,16 +1262,17 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, cork->length = 0; sk->sk_sndmsg_page = NULL; sk->sk_sndmsg_off = 0; - exthdrlen = rt->dst.header_len + (opt ? opt->opt_flen : 0) - - rt->rt6i_nfheader_len; + exthdrlen = (opt ? opt->opt_flen : 0) - rt->rt6i_nfheader_len; length += exthdrlen; transhdrlen += exthdrlen; + dst_exthdrlen = rt->dst.header_len; } else { rt = (struct rt6_info *)cork->dst; fl6 = &inet->cork.fl.u.ip6; opt = np->cork.opt; transhdrlen = 0; exthdrlen = 0; + dst_exthdrlen = 0; mtu = cork->fragsize; } @@ -1368,6 +1372,8 @@ alloc_new_skb: else alloclen = datalen + fragheaderlen; + alloclen += dst_exthdrlen; + /* * The last fragment gets additional space at tail. * Note: we overallocate on fragments with MSG_MODE @@ -1419,9 +1425,9 @@ alloc_new_skb: /* * Find where to start putting bytes */ - data = skb_put(skb, fraglen); - skb_set_network_header(skb, exthdrlen); - data += fragheaderlen; + data = skb_put(skb, fraglen + dst_exthdrlen); + skb_set_network_header(skb, exthdrlen + dst_exthdrlen); + data += fragheaderlen + dst_exthdrlen; skb->transport_header = (skb->network_header + fragheaderlen); if (fraggap) { @@ -1434,6 +1440,7 @@ alloc_new_skb: pskb_trim_unique(skb_prev, maxfraglen); } copy = datalen - transhdrlen - fraggap; + if (copy < 0) { err = -EINVAL; kfree_skb(skb); @@ -1448,6 +1455,7 @@ alloc_new_skb: length -= datalen - fraggap; transhdrlen = 0; exthdrlen = 0; + dst_exthdrlen = 0; csummode = CHECKSUM_NONE; /* @@ -1480,13 +1488,13 @@ alloc_new_skb: if (page && (left = PAGE_SIZE - off) > 0) { if (copy >= left) copy = left; - if (page != frag->page) { + if (page != skb_frag_page(frag)) { if (i == MAX_SKB_FRAGS) { err = -EMSGSIZE; goto error; } - get_page(page); skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0); + skb_frag_ref(skb, i); frag = &skb_shinfo(skb)->frags[i]; } } else if(i < MAX_SKB_FRAGS) { @@ -1506,12 +1514,14 @@ alloc_new_skb: err = -EMSGSIZE; goto error; } - if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) { + if (getfrag(from, + skb_frag_address(frag) + skb_frag_size(frag), + offset, copy, skb->len, skb) < 0) { err = -EFAULT; goto error; } sk->sk_sndmsg_off += copy; - frag->size += copy; + skb_frag_size_add(frag, copy); skb->len += copy; skb->data_len += copy; skb->truesize += copy; @@ -1583,7 +1593,7 @@ int ip6_push_pending_frames(struct sock *sk) if (np->pmtudisc < IPV6_PMTUDISC_DO) skb->local_df = 1; - ipv6_addr_copy(final_dst, &fl6->daddr); + *final_dst = fl6->daddr; __skb_pull(skb, skb_network_header_len(skb)); if (opt && opt->opt_flen) ipv6_push_frag_opts(skb, opt, &proto); @@ -1599,8 +1609,8 @@ int ip6_push_pending_frames(struct sock *sk) hdr->hop_limit = np->cork.hop_limit; hdr->nexthdr = proto; - ipv6_addr_copy(&hdr->saddr, &fl6->saddr); - ipv6_addr_copy(&hdr->daddr, final_dst); + hdr->saddr = fl6->saddr; + hdr->daddr = *final_dst; skb->priority = sk->sk_priority; skb->mark = sk->sk_mark; diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 0bc98886c383..aa21da6a09cd 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -93,7 +93,7 @@ struct pcpu_tstats { unsigned long rx_bytes; unsigned long tx_packets; unsigned long tx_bytes; -}; +} __attribute__((aligned(4*sizeof(unsigned long)))); static struct net_device_stats *ip6_get_stats(struct net_device *dev) { @@ -289,6 +289,8 @@ static struct ip6_tnl *ip6_tnl_create(struct net *net, struct ip6_tnl_parm *p) if ((err = register_netdevice(dev)) < 0) goto failed_free; + strcpy(t->parms.name, dev->name); + dev_hold(dev); ip6_tnl_link(ip6n, t); return t; @@ -350,7 +352,7 @@ ip6_tnl_dev_uninit(struct net_device *dev) struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); if (dev == ip6n->fb_tnl_dev) - rcu_assign_pointer(ip6n->tnls_wc[0], NULL); + RCU_INIT_POINTER(ip6n->tnls_wc[0], NULL); else ip6_tnl_unlink(ip6n, t); ip6_tnl_dst_reset(t); @@ -651,8 +653,8 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0, 0); - if (rt && rt->rt6i_dev) - skb2->dev = rt->rt6i_dev; + if (rt && rt->dst.dev) + skb2->dev = rt->dst.dev; icmpv6_send(skb2, rel_type, rel_code, rel_info); @@ -889,7 +891,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, struct net_device_stats *stats = &t->dev->stats; struct ipv6hdr *ipv6h = ipv6_hdr(skb); struct ipv6_tel_txoption opt; - struct dst_entry *dst; + struct dst_entry *dst = NULL, *ndst = NULL; struct net_device *tdev; int mtu; unsigned int max_headroom = sizeof(struct ipv6hdr); @@ -897,19 +899,20 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, int err = -1; int pkt_len; - if ((dst = ip6_tnl_dst_check(t)) != NULL) - dst_hold(dst); - else { - dst = ip6_route_output(net, NULL, fl6); + if (!fl6->flowi6_mark) + dst = ip6_tnl_dst_check(t); + if (!dst) { + ndst = ip6_route_output(net, NULL, fl6); - if (dst->error) + if (ndst->error) goto tx_err_link_failure; - dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), NULL, 0); - if (IS_ERR(dst)) { - err = PTR_ERR(dst); - dst = NULL; + ndst = xfrm_lookup(net, ndst, flowi6_to_flowi(fl6), NULL, 0); + if (IS_ERR(ndst)) { + err = PTR_ERR(ndst); + ndst = NULL; goto tx_err_link_failure; } + dst = ndst; } tdev = dst->dev; @@ -955,8 +958,12 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, skb = new_skb; } skb_dst_drop(skb); - skb_dst_set(skb, dst_clone(dst)); - + if (fl6->flowi6_mark) { + skb_dst_set(skb, dst); + ndst = NULL; + } else { + skb_dst_set_noref(skb, dst); + } skb->transport_header = skb->network_header; proto = fl6->flowi6_proto; @@ -972,8 +979,8 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, ipv6_change_dsfield(ipv6h, ~INET_ECN_MASK, dsfield); ipv6h->hop_limit = t->parms.hop_limit; ipv6h->nexthdr = proto; - ipv6_addr_copy(&ipv6h->saddr, &fl6->saddr); - ipv6_addr_copy(&ipv6h->daddr, &fl6->daddr); + ipv6h->saddr = fl6->saddr; + ipv6h->daddr = fl6->daddr; nf_reset(skb); pkt_len = skb->len; err = ip6_local_out(skb); @@ -987,13 +994,14 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, stats->tx_errors++; stats->tx_aborted_errors++; } - ip6_tnl_dst_store(t, dst); + if (ndst) + ip6_tnl_dst_store(t, ndst); return 0; tx_err_link_failure: stats->tx_carrier_errors++; dst_link_failure(skb); tx_err_dst_release: - dst_release(dst); + dst_release(ndst); return err; } @@ -1020,9 +1028,11 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) dsfield = ipv4_get_dsfield(iph); - if ((t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)) + if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) fl6.flowlabel |= htonl((__u32)iph->tos << IPV6_TCLASS_SHIFT) & IPV6_TCLASS_MASK; + if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) + fl6.flowi6_mark = skb->mark; err = ip6_tnl_xmit2(skb, dev, dsfield, &fl6, encap_limit, &mtu); if (err != 0) { @@ -1069,10 +1079,12 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) fl6.flowi6_proto = IPPROTO_IPV6; dsfield = ipv6_get_dsfield(ipv6h); - if ((t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)) + if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_TCLASS_MASK); - if ((t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL)) + if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL) fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_FLOWLABEL_MASK); + if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) + fl6.flowi6_mark = skb->mark; err = ip6_tnl_xmit2(skb, dev, dsfield, &fl6, encap_limit, &mtu); if (err != 0) { @@ -1143,8 +1155,8 @@ static void ip6_tnl_link_config(struct ip6_tnl *t) memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr)); /* Set up flowi template */ - ipv6_addr_copy(&fl6->saddr, &p->laddr); - ipv6_addr_copy(&fl6->daddr, &p->raddr); + fl6->saddr = p->laddr; + fl6->daddr = p->raddr; fl6->flowi6_oif = p->link; fl6->flowlabel = 0; @@ -1173,11 +1185,11 @@ static void ip6_tnl_link_config(struct ip6_tnl *t) if (rt == NULL) return; - if (rt->rt6i_dev) { - dev->hard_header_len = rt->rt6i_dev->hard_header_len + + if (rt->dst.dev) { + dev->hard_header_len = rt->dst.dev->hard_header_len + sizeof (struct ipv6hdr); - dev->mtu = rt->rt6i_dev->mtu - sizeof (struct ipv6hdr); + dev->mtu = rt->dst.dev->mtu - sizeof (struct ipv6hdr); if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) dev->mtu-=8; @@ -1200,8 +1212,8 @@ static void ip6_tnl_link_config(struct ip6_tnl *t) static int ip6_tnl_change(struct ip6_tnl *t, struct ip6_tnl_parm *p) { - ipv6_addr_copy(&t->parms.laddr, &p->laddr); - ipv6_addr_copy(&t->parms.raddr, &p->raddr); + t->parms.laddr = p->laddr; + t->parms.raddr = p->raddr; t->parms.flags = p->flags; t->parms.hop_limit = p->hop_limit; t->parms.encap_limit = p->encap_limit; @@ -1397,7 +1409,6 @@ ip6_tnl_dev_init_gen(struct net_device *dev) struct ip6_tnl *t = netdev_priv(dev); t->dev = dev; - strcpy(t->parms.name, dev->name); dev->tstats = alloc_percpu(struct pcpu_tstats); if (!dev->tstats) return -ENOMEM; @@ -1477,6 +1488,7 @@ static void __net_exit ip6_tnl_destroy_tunnels(struct ip6_tnl_net *ip6n) static int __net_init ip6_tnl_init_net(struct net *net) { struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); + struct ip6_tnl *t = NULL; int err; ip6n->tnls[0] = ip6n->tnls_wc; @@ -1497,6 +1509,10 @@ static int __net_init ip6_tnl_init_net(struct net *net) err = register_netdev(ip6n->fb_tnl_dev); if (err < 0) goto err_register; + + t = netdev_priv(ip6n->fb_tnl_dev); + + strcpy(t->parms.name, ip6n->fb_tnl_dev->name); return 0; err_register: diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index def0538e2413..c7e95c8c579f 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -51,6 +51,7 @@ #include <linux/pim.h> #include <net/addrconf.h> #include <linux/netfilter_ipv6.h> +#include <linux/export.h> #include <net/ip6_checksum.h> struct mr6_table { @@ -1104,8 +1105,8 @@ static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt, msg->im6_msgtype = MRT6MSG_WHOLEPKT; msg->im6_mif = mrt->mroute_reg_vif_num; msg->im6_pad = 0; - ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr); - ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr); + msg->im6_src = ipv6_hdr(pkt)->saddr; + msg->im6_dst = ipv6_hdr(pkt)->daddr; skb->ip_summed = CHECKSUM_UNNECESSARY; } else @@ -1130,8 +1131,8 @@ static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt, msg->im6_msgtype = assert; msg->im6_mif = mifi; msg->im6_pad = 0; - ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr); - ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr); + msg->im6_src = ipv6_hdr(pkt)->saddr; + msg->im6_dst = ipv6_hdr(pkt)->daddr; skb_dst_set(skb, dst_clone(skb_dst(pkt))); skb->ip_summed = CHECKSUM_UNNECESSARY; @@ -2180,8 +2181,8 @@ int ip6mr_get_route(struct net *net, iph->payload_len = 0; iph->nexthdr = IPPROTO_NONE; iph->hop_limit = 0; - ipv6_addr_copy(&iph->saddr, &rt->rt6i_src.addr); - ipv6_addr_copy(&iph->daddr, &rt->rt6i_dst.addr); + iph->saddr = rt->rt6i_src.addr; + iph->daddr = rt->rt6i_dst.addr; err = ip6mr_cache_unresolved(mrt, vif, skb2); read_unlock(&mrt_lock); diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 2fbda5fc4cc4..18a2719003c3 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -343,7 +343,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, break; case IPV6_TRANSPARENT: - if (!capable(CAP_NET_ADMIN)) { + if (valbool && !capable(CAP_NET_ADMIN) && !capable(CAP_NET_RAW)) { retv = -EPERM; break; } @@ -435,7 +435,7 @@ sticky_done: goto e_inval; np->sticky_pktinfo.ipi6_ifindex = pkt.ipi6_ifindex; - ipv6_addr_copy(&np->sticky_pktinfo.ipi6_addr, &pkt.ipi6_addr); + np->sticky_pktinfo.ipi6_addr = pkt.ipi6_addr; retv = 0; break; } @@ -503,7 +503,7 @@ done: goto e_inval; if (val > 255 || val < -1) goto e_inval; - np->mcast_hops = val; + np->mcast_hops = (val == -1 ? IPV6_DEFAULT_MCASTHOPS : val); retv = 0; break; @@ -980,8 +980,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, struct in6_pktinfo src_info; src_info.ipi6_ifindex = np->mcast_oif ? np->mcast_oif : np->sticky_pktinfo.ipi6_ifindex; - np->mcast_oif? ipv6_addr_copy(&src_info.ipi6_addr, &np->daddr) : - ipv6_addr_copy(&src_info.ipi6_addr, &(np->sticky_pktinfo.ipi6_addr)); + src_info.ipi6_addr = np->mcast_oif ? np->daddr : np->sticky_pktinfo.ipi6_addr; put_cmsg(&msg, SOL_IPV6, IPV6_PKTINFO, sizeof(src_info), &src_info); } if (np->rxopt.bits.rxhlim) { @@ -992,8 +991,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, struct in6_pktinfo src_info; src_info.ipi6_ifindex = np->mcast_oif ? np->mcast_oif : np->sticky_pktinfo.ipi6_ifindex; - np->mcast_oif? ipv6_addr_copy(&src_info.ipi6_addr, &np->daddr) : - ipv6_addr_copy(&src_info.ipi6_addr, &(np->sticky_pktinfo.ipi6_addr)); + src_info.ipi6_addr = np->mcast_oif ? np->daddr : np->sticky_pktinfo.ipi6_addr; put_cmsg(&msg, SOL_IPV6, IPV6_2292PKTINFO, sizeof(src_info), &src_info); } if (np->rxopt.bits.rxohlim) { diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index ee7839f4d6e3..b853f06cc148 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -155,14 +155,14 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr) return -ENOMEM; mc_lst->next = NULL; - ipv6_addr_copy(&mc_lst->addr, addr); + mc_lst->addr = *addr; rcu_read_lock(); if (ifindex == 0) { struct rt6_info *rt; rt = rt6_lookup(net, addr, NULL, 0, 0); if (rt) { - dev = rt->rt6i_dev; + dev = rt->dst.dev; dst_release(&rt->dst); } } else @@ -256,7 +256,7 @@ static struct inet6_dev *ip6_mc_find_dev_rcu(struct net *net, struct rt6_info *rt = rt6_lookup(net, group, NULL, 0, 0); if (rt) { - dev = rt->rt6i_dev; + dev = rt->dst.dev; dev_hold(dev); dst_release(&rt->dst); } @@ -858,7 +858,7 @@ int ipv6_dev_mc_inc(struct net_device *dev, const struct in6_addr *addr) setup_timer(&mc->mca_timer, igmp6_timer_handler, (unsigned long)mc); - ipv6_addr_copy(&mc->mca_addr, addr); + mc->mca_addr = *addr; mc->idev = idev; /* (reference taken) */ mc->mca_users = 1; /* mca_stamp should be updated upon changes */ @@ -1343,13 +1343,15 @@ static struct sk_buff *mld_newpack(struct net_device *dev, int size) struct mld2_report *pmr; struct in6_addr addr_buf; const struct in6_addr *saddr; + int hlen = LL_RESERVED_SPACE(dev); + int tlen = dev->needed_tailroom; int err; u8 ra[8] = { IPPROTO_ICMPV6, 0, IPV6_TLV_ROUTERALERT, 2, 0, 0, IPV6_TLV_PADN, 0 }; /* we assume size > sizeof(ra) here */ - size += LL_ALLOCATED_SPACE(dev); + size += hlen + tlen; /* limit our allocations to order-0 page */ size = min_t(int, size, SKB_MAX_ORDER(0, 0)); skb = sock_alloc_send_skb(sk, size, 1, &err); @@ -1357,7 +1359,7 @@ static struct sk_buff *mld_newpack(struct net_device *dev, int size) if (!skb) return NULL; - skb_reserve(skb, LL_RESERVED_SPACE(dev)); + skb_reserve(skb, hlen); if (ipv6_get_lladdr(dev, &addr_buf, IFA_F_TENTATIVE)) { /* <draft-ietf-magma-mld-source-05.txt>: @@ -1408,18 +1410,11 @@ static void mld_sendpack(struct sk_buff *skb) csum_partial(skb_transport_header(skb), mldlen, 0)); - dst = icmp6_dst_alloc(skb->dev, NULL, &ipv6_hdr(skb)->daddr); - - if (!dst) { - err = -ENOMEM; - goto err_out; - } - icmpv6_flow_init(net->ipv6.igmp_sk, &fl6, ICMPV6_MLD2_REPORT, &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, skb->dev->ifindex); + dst = icmp6_dst_alloc(skb->dev, NULL, &fl6); - dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0); err = 0; if (IS_ERR(dst)) { err = PTR_ERR(dst); @@ -1723,6 +1718,8 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) struct mld_msg *hdr; const struct in6_addr *snd_addr, *saddr; struct in6_addr addr_buf; + int hlen = LL_RESERVED_SPACE(dev); + int tlen = dev->needed_tailroom; int err, len, payload_len, full_len; u8 ra[8] = { IPPROTO_ICMPV6, 0, IPV6_TLV_ROUTERALERT, 2, 0, 0, @@ -1744,7 +1741,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) IPSTATS_MIB_OUT, full_len); rcu_read_unlock(); - skb = sock_alloc_send_skb(sk, LL_ALLOCATED_SPACE(dev) + full_len, 1, &err); + skb = sock_alloc_send_skb(sk, hlen + tlen + full_len, 1, &err); if (skb == NULL) { rcu_read_lock(); @@ -1754,7 +1751,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) return; } - skb_reserve(skb, LL_RESERVED_SPACE(dev)); + skb_reserve(skb, hlen); if (ipv6_get_lladdr(dev, &addr_buf, IFA_F_TENTATIVE)) { /* <draft-ietf-magma-mld-source-05.txt>: @@ -1772,7 +1769,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) hdr = (struct mld_msg *) skb_put(skb, sizeof(struct mld_msg)); memset(hdr, 0, sizeof(struct mld_msg)); hdr->mld_type = type; - ipv6_addr_copy(&hdr->mld_mca, addr); + hdr->mld_mca = *addr; hdr->mld_cksum = csum_ipv6_magic(saddr, snd_addr, len, IPPROTO_ICMPV6, @@ -1781,17 +1778,10 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) rcu_read_lock(); idev = __in6_dev_get(skb->dev); - dst = icmp6_dst_alloc(skb->dev, NULL, &ipv6_hdr(skb)->daddr); - if (!dst) { - err = -ENOMEM; - goto err_out; - } - icmpv6_flow_init(sk, &fl6, type, &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, skb->dev->ifindex); - - dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0); + dst = icmp6_dst_alloc(skb->dev, NULL, &fl6); if (IS_ERR(dst)) { err = PTR_ERR(dst); goto err_out; @@ -1914,7 +1904,7 @@ static int ip6_mc_del_src(struct inet6_dev *idev, const struct in6_addr *pmca, * Add multicast single-source filter to the interface list */ static int ip6_mc_add1_src(struct ifmcaddr6 *pmc, int sfmode, - const struct in6_addr *psfsrc, int delta) + const struct in6_addr *psfsrc) { struct ip6_sf_list *psf, *psf_prev; @@ -2045,7 +2035,7 @@ static int ip6_mc_add_src(struct inet6_dev *idev, const struct in6_addr *pmca, pmc->mca_sfcount[sfmode]++; err = 0; for (i=0; i<sfcount; i++) { - err = ip6_mc_add1_src(pmc, sfmode, &psfsrc[i], delta); + err = ip6_mc_add1_src(pmc, sfmode, &psfsrc[i]); if (err) break; } diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c index 43242e6e6103..7e1e0fbfef21 100644 --- a/net/ipv6/mip6.c +++ b/net/ipv6/mip6.c @@ -195,8 +195,8 @@ static inline int mip6_report_rl_allow(struct timeval *stamp, mip6_report_rl.stamp.tv_sec = stamp->tv_sec; mip6_report_rl.stamp.tv_usec = stamp->tv_usec; mip6_report_rl.iif = iif; - ipv6_addr_copy(&mip6_report_rl.src, src); - ipv6_addr_copy(&mip6_report_rl.dst, dst); + mip6_report_rl.src = *src; + mip6_report_rl.dst = *dst; allow = 1; } spin_unlock_bh(&mip6_report_rl.lock); diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 9da6e02eaaeb..d8f02ef88e59 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -93,7 +93,7 @@ static u32 ndisc_hash(const void *pkey, const struct net_device *dev, - __u32 rnd); + __u32 *hash_rnd); static int ndisc_constructor(struct neighbour *neigh); static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb); static void ndisc_error_report(struct neighbour *neigh, struct sk_buff *skb); @@ -126,7 +126,6 @@ static const struct neigh_ops ndisc_direct_ops = { struct neigh_table nd_tbl = { .family = AF_INET6, - .entry_size = sizeof(struct neighbour) + sizeof(struct in6_addr), .key_len = sizeof(struct in6_addr), .hash = ndisc_hash, .constructor = ndisc_constructor, @@ -141,7 +140,7 @@ struct neigh_table nd_tbl = { .gc_staletime = 60 * HZ, .reachable_time = ND_REACHABLE_TIME, .delay_probe_time = 5 * HZ, - .queue_len = 3, + .queue_len_bytes = 64*1024, .ucast_probes = 3, .mcast_probes = 3, .anycast_delay = 1 * HZ, @@ -350,16 +349,9 @@ EXPORT_SYMBOL(ndisc_mc_map); static u32 ndisc_hash(const void *pkey, const struct net_device *dev, - __u32 hash_rnd) + __u32 *hash_rnd) { - const u32 *p32 = pkey; - u32 addr_hash, i; - - addr_hash = 0; - for (i = 0; i < (sizeof(struct in6_addr) / sizeof(u32)); i++) - addr_hash ^= *p32++; - - return jhash_2words(addr_hash, dev->ifindex, hash_rnd); + return ndisc_hashfn(pkey, dev, hash_rnd); } static int ndisc_constructor(struct neighbour *neigh) @@ -370,17 +362,14 @@ static int ndisc_constructor(struct neighbour *neigh) struct neigh_parms *parms; int is_multicast = ipv6_addr_is_multicast(addr); - rcu_read_lock(); in6_dev = in6_dev_get(dev); if (in6_dev == NULL) { - rcu_read_unlock(); return -EINVAL; } parms = in6_dev->nd_parms; __neigh_parms_put(neigh->parms); neigh->parms = neigh_parms_clone(parms); - rcu_read_unlock(); neigh->type = is_multicast ? RTN_MULTICAST : RTN_UNICAST; if (!dev->header_ops) { @@ -449,6 +438,8 @@ struct sk_buff *ndisc_build_skb(struct net_device *dev, struct sock *sk = net->ipv6.ndisc_sk; struct sk_buff *skb; struct icmp6hdr *hdr; + int hlen = LL_RESERVED_SPACE(dev); + int tlen = dev->needed_tailroom; int len; int err; u8 *opt; @@ -462,7 +453,7 @@ struct sk_buff *ndisc_build_skb(struct net_device *dev, skb = sock_alloc_send_skb(sk, (MAX_HEADER + sizeof(struct ipv6hdr) + - len + LL_ALLOCATED_SPACE(dev)), + len + hlen + tlen), 1, &err); if (!skb) { ND_PRINTK0(KERN_ERR @@ -471,7 +462,7 @@ struct sk_buff *ndisc_build_skb(struct net_device *dev, return NULL; } - skb_reserve(skb, LL_RESERVED_SPACE(dev)); + skb_reserve(skb, hlen); ip6_nd_hdr(sk, skb, dev, saddr, daddr, IPPROTO_ICMPV6, len); skb->transport_header = skb->tail; @@ -482,7 +473,7 @@ struct sk_buff *ndisc_build_skb(struct net_device *dev, opt = skb_transport_header(skb) + sizeof(struct icmp6hdr); if (target) { - ipv6_addr_copy((struct in6_addr *)opt, target); + *(struct in6_addr *)opt = *target; opt += sizeof(*target); } @@ -518,14 +509,7 @@ void ndisc_send_skb(struct sk_buff *skb, type = icmp6h->icmp6_type; icmpv6_flow_init(sk, &fl6, type, saddr, daddr, dev->ifindex); - - dst = icmp6_dst_alloc(dev, neigh, daddr); - if (!dst) { - kfree_skb(skb); - return; - } - - dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0); + dst = icmp6_dst_alloc(dev, neigh, &fl6); if (IS_ERR(dst)) { kfree_skb(skb); return; @@ -533,7 +517,8 @@ void ndisc_send_skb(struct sk_buff *skb, skb_dst_set(skb, dst); - idev = in6_dev_get(dst->dev); + rcu_read_lock(); + idev = __in6_dev_get(dst->dev); IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len); err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev, @@ -543,8 +528,7 @@ void ndisc_send_skb(struct sk_buff *skb, ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS); } - if (likely(idev != NULL)) - in6_dev_put(idev); + rcu_read_unlock(); } EXPORT_SYMBOL(ndisc_send_skb); @@ -1039,7 +1023,7 @@ static void ndisc_recv_rs(struct sk_buff *skb) if (skb->len < sizeof(*rs_msg)) return; - idev = in6_dev_get(skb->dev); + idev = __in6_dev_get(skb->dev); if (!idev) { if (net_ratelimit()) ND_PRINTK1("ICMP6 RS: can't find in6 device\n"); @@ -1080,7 +1064,7 @@ static void ndisc_recv_rs(struct sk_buff *skb) neigh_release(neigh); } out: - in6_dev_put(idev); + return; } static void ndisc_ra_useropt(struct sk_buff *ra, struct nd_opt_hdr *opt) @@ -1179,7 +1163,7 @@ static void ndisc_router_discovery(struct sk_buff *skb) * set the RA_RECV flag in the interface */ - in6_dev = in6_dev_get(skb->dev); + in6_dev = __in6_dev_get(skb->dev); if (in6_dev == NULL) { ND_PRINTK0(KERN_ERR "ICMPv6 RA: can't find inet6 device for %s.\n", @@ -1188,7 +1172,6 @@ static void ndisc_router_discovery(struct sk_buff *skb) } if (!ndisc_parse_options(opt, optlen, &ndopts)) { - in6_dev_put(in6_dev); ND_PRINTK2(KERN_WARNING "ICMP6 RA: invalid ND options\n"); return; @@ -1225,6 +1208,9 @@ static void ndisc_router_discovery(struct sk_buff *skb) if (!in6_dev->cnf.accept_ra_defrtr) goto skip_defrtr; + if (ipv6_chk_addr(dev_net(in6_dev->dev), &ipv6_hdr(skb)->saddr, NULL, 0)) + goto skip_defrtr; + lifetime = ntohs(ra_msg->icmph.icmp6_rt_lifetime); #ifdef CONFIG_IPV6_ROUTER_PREF @@ -1238,7 +1224,7 @@ static void ndisc_router_discovery(struct sk_buff *skb) rt = rt6_get_dflt_router(&ipv6_hdr(skb)->saddr, skb->dev); if (rt) - neigh = dst_get_neighbour(&rt->dst); + neigh = dst_get_neighbour_noref(&rt->dst); if (rt && lifetime == 0) { neigh_clone(neigh); @@ -1255,17 +1241,15 @@ static void ndisc_router_discovery(struct sk_buff *skb) ND_PRINTK0(KERN_ERR "ICMPv6 RA: %s() failed to add default route.\n", __func__); - in6_dev_put(in6_dev); return; } - neigh = dst_get_neighbour(&rt->dst); + neigh = dst_get_neighbour_noref(&rt->dst); if (neigh == NULL) { ND_PRINTK0(KERN_ERR "ICMPv6 RA: %s() got default router without neighbour.\n", __func__); dst_release(&rt->dst); - in6_dev_put(in6_dev); return; } neigh->flags |= NTF_ROUTER; @@ -1274,7 +1258,7 @@ static void ndisc_router_discovery(struct sk_buff *skb) } if (rt) - rt->rt6i_expires = jiffies + (HZ * lifetime); + rt->dst.expires = jiffies + (HZ * lifetime); if (ra_msg->icmph.icmp6_hop_limit) { in6_dev->cnf.hop_limit = ra_msg->icmph.icmp6_hop_limit; @@ -1349,6 +1333,9 @@ skip_linkparms: goto out; #ifdef CONFIG_IPV6_ROUTE_INFO + if (ipv6_chk_addr(dev_net(in6_dev->dev), &ipv6_hdr(skb)->saddr, NULL, 0)) + goto skip_routeinfo; + if (in6_dev->cnf.accept_ra_rtr_pref && ndopts.nd_opts_ri) { struct nd_opt_hdr *p; for (p = ndopts.nd_opts_ri; @@ -1366,6 +1353,8 @@ skip_linkparms: &ipv6_hdr(skb)->saddr); } } + +skip_routeinfo: #endif #ifdef CONFIG_IPV6_NDISC_NODETYPE @@ -1379,7 +1368,9 @@ skip_linkparms: for (p = ndopts.nd_opts_pi; p; p = ndisc_next_option(p, ndopts.nd_opts_pi_end)) { - addrconf_prefix_rcv(skb->dev, (u8*)p, (p->nd_opt_len) << 3); + addrconf_prefix_rcv(skb->dev, (u8 *)p, + (p->nd_opt_len) << 3, + ndopts.nd_opts_src_lladdr != NULL); } } @@ -1422,7 +1413,6 @@ out: dst_release(&rt->dst); else if (neigh) neigh_release(neigh); - in6_dev_put(in6_dev); } static void ndisc_redirect_rcv(struct sk_buff *skb) @@ -1481,13 +1471,11 @@ static void ndisc_redirect_rcv(struct sk_buff *skb) return; } - in6_dev = in6_dev_get(skb->dev); + in6_dev = __in6_dev_get(skb->dev); if (!in6_dev) return; - if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects) { - in6_dev_put(in6_dev); + if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects) return; - } /* RFC2461 8.1: * The IP source address of the Redirect MUST be the same as the current @@ -1497,7 +1485,6 @@ static void ndisc_redirect_rcv(struct sk_buff *skb) if (!ndisc_parse_options((u8*)(dest + 1), optlen, &ndopts)) { ND_PRINTK2(KERN_WARNING "ICMPv6 Redirect: invalid ND options\n"); - in6_dev_put(in6_dev); return; } if (ndopts.nd_opts_tgt_lladdr) { @@ -1506,7 +1493,6 @@ static void ndisc_redirect_rcv(struct sk_buff *skb) if (!lladdr) { ND_PRINTK2(KERN_WARNING "ICMPv6 Redirect: invalid link-layer address length\n"); - in6_dev_put(in6_dev); return; } } @@ -1518,7 +1504,6 @@ static void ndisc_redirect_rcv(struct sk_buff *skb) on_link); neigh_release(neigh); } - in6_dev_put(in6_dev); } void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh, @@ -1537,6 +1522,7 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh, struct inet6_dev *idev; struct flowi6 fl6; u8 *opt; + int hlen, tlen; int rd_len; int err; u8 ha_buf[MAX_ADDR_LEN], *ha = NULL; @@ -1575,7 +1561,7 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh, } if (!rt->rt6i_peer) rt6_bind_peer(rt, 1); - if (inet_peer_xrlim_allow(rt->rt6i_peer, 1*HZ)) + if (!inet_peer_xrlim_allow(rt->rt6i_peer, 1*HZ)) goto release; if (dev->addr_len) { @@ -1594,9 +1580,11 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh, rd_len &= ~0x7; len += rd_len; + hlen = LL_RESERVED_SPACE(dev); + tlen = dev->needed_tailroom; buff = sock_alloc_send_skb(sk, (MAX_HEADER + sizeof(struct ipv6hdr) + - len + LL_ALLOCATED_SPACE(dev)), + len + hlen + tlen), 1, &err); if (buff == NULL) { ND_PRINTK0(KERN_ERR @@ -1605,7 +1593,7 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh, goto release; } - skb_reserve(buff, LL_RESERVED_SPACE(dev)); + skb_reserve(buff, hlen); ip6_nd_hdr(sk, buff, dev, &saddr_buf, &ipv6_hdr(skb)->saddr, IPPROTO_ICMPV6, len); @@ -1621,9 +1609,9 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh, */ addrp = (struct in6_addr *)(icmph + 1); - ipv6_addr_copy(addrp, target); + *addrp = *target; addrp++; - ipv6_addr_copy(addrp, &ipv6_hdr(skb)->daddr); + *addrp = ipv6_hdr(skb)->daddr; opt = (u8*) (addrp + 1); @@ -1651,7 +1639,8 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh, csum_partial(icmph, len, 0)); skb_dst_set(buff, dst); - idev = in6_dev_get(dst->dev); + rcu_read_lock(); + idev = __in6_dev_get(dst->dev); IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len); err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, buff, NULL, dst->dev, dst_output); @@ -1660,8 +1649,7 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh, ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS); } - if (likely(idev != NULL)) - in6_dev_put(idev); + rcu_read_unlock(); return; release: diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 30fcee465448..db31561cc8df 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -3,6 +3,7 @@ #include <linux/ipv6.h> #include <linux/netfilter.h> #include <linux/netfilter_ipv6.h> +#include <linux/export.h> #include <net/dst.h> #include <net/ipv6.h> #include <net/ip6_route.h> @@ -100,9 +101,16 @@ static int nf_ip6_route(struct net *net, struct dst_entry **dst, .pinet6 = (struct ipv6_pinfo *) &fake_pinfo, }; const void *sk = strict ? &fake_sk : NULL; - - *dst = ip6_route_output(net, sk, &fl->u.ip6); - return (*dst)->error; + struct dst_entry *result; + int err; + + result = ip6_route_output(net, sk, &fl->u.ip6); + err = result->error; + if (err) + dst_release(result); + else + *dst = result; + return err; } __sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook, diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 448464844a25..9a68fb5b9e77 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -125,6 +125,16 @@ config IP6_NF_MATCH_MH To compile it as a module, choose M here. If unsure, say N. +config IP6_NF_MATCH_RPFILTER + tristate '"rpfilter" reverse path filter match support' + depends on NETFILTER_ADVANCED + ---help--- + This option allows you to match packets whose replies would + go out via the interface the packet came in. + + To compile it as a module, choose M here. If unsure, say N. + The module will be called ip6t_rpfilter. + config IP6_NF_MATCH_RT tristate '"rt" Routing header match support' depends on NETFILTER_ADVANCED @@ -186,7 +196,6 @@ config IP6_NF_MANGLE config IP6_NF_RAW tristate 'raw table support (required for TRACE)' - depends on NETFILTER_ADVANCED help This option adds a `raw' table to ip6tables. This table is the very first in the netfilter framework and hooks in at the PREROUTING diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index abfee91ce816..2eaed96db02c 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -27,6 +27,7 @@ obj-$(CONFIG_IP6_NF_MATCH_FRAG) += ip6t_frag.o obj-$(CONFIG_IP6_NF_MATCH_IPV6HEADER) += ip6t_ipv6header.o obj-$(CONFIG_IP6_NF_MATCH_MH) += ip6t_mh.o obj-$(CONFIG_IP6_NF_MATCH_OPTS) += ip6t_hbh.o +obj-$(CONFIG_IP6_NF_MATCH_RPFILTER) += ip6t_rpfilter.o obj-$(CONFIG_IP6_NF_MATCH_RT) += ip6t_rt.o # targets diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index 5e5ce778be7f..a34c9e4c792c 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -405,6 +405,7 @@ __ipq_rcv_skb(struct sk_buff *skb) int status, type, pid, flags; unsigned int nlmsglen, skblen; struct nlmsghdr *nlh; + bool enable_timestamp = false; skblen = skb->len; if (skblen < sizeof(*nlh)) @@ -442,11 +443,13 @@ __ipq_rcv_skb(struct sk_buff *skb) RCV_SKB_FAIL(-EBUSY); } } else { - net_enable_timestamp(); + enable_timestamp = true; peer_pid = pid; } spin_unlock_bh(&queue_lock); + if (enable_timestamp) + net_enable_timestamp(); status = ipq_receive_peer(NLMSG_DATA(nlh), type, nlmsglen - NLMSG_LENGTH(0)); diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index a5a4c5dd5396..aad2fa41cf46 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -49,6 +49,7 @@ static void send_reset(struct net *net, struct sk_buff *oldskb) const __u8 tclass = DEFAULT_TOS_VALUE; struct dst_entry *dst = NULL; u8 proto; + __be16 frag_off; struct flowi6 fl6; if ((!(ipv6_addr_type(&oip6h->saddr) & IPV6_ADDR_UNICAST)) || @@ -58,7 +59,7 @@ static void send_reset(struct net *net, struct sk_buff *oldskb) } proto = oip6h->nexthdr; - tcphoff = ipv6_skip_exthdr(oldskb, ((u8*)(oip6h+1) - oldskb->data), &proto); + tcphoff = ipv6_skip_exthdr(oldskb, ((u8*)(oip6h+1) - oldskb->data), &proto, &frag_off); if ((tcphoff < 0) || (tcphoff > oldskb->len)) { pr_debug("Cannot get TCP header.\n"); @@ -93,8 +94,8 @@ static void send_reset(struct net *net, struct sk_buff *oldskb) memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_proto = IPPROTO_TCP; - ipv6_addr_copy(&fl6.saddr, &oip6h->daddr); - ipv6_addr_copy(&fl6.daddr, &oip6h->saddr); + fl6.saddr = oip6h->daddr; + fl6.daddr = oip6h->saddr; fl6.fl6_sport = otcph.dest; fl6.fl6_dport = otcph.source; security_skb_classify_flow(oldskb, flowi6_to_flowi(&fl6)); @@ -129,8 +130,8 @@ static void send_reset(struct net *net, struct sk_buff *oldskb) *(__be32 *)ip6h = htonl(0x60000000 | (tclass << 20)); ip6h->hop_limit = ip6_dst_hoplimit(dst); ip6h->nexthdr = IPPROTO_TCP; - ipv6_addr_copy(&ip6h->saddr, &oip6h->daddr); - ipv6_addr_copy(&ip6h->daddr, &oip6h->saddr); + ip6h->saddr = oip6h->daddr; + ip6h->daddr = oip6h->saddr; tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr)); /* Truncate to length (no data) */ diff --git a/net/ipv6/netfilter/ip6t_rpfilter.c b/net/ipv6/netfilter/ip6t_rpfilter.c new file mode 100644 index 000000000000..5d1d8b04d694 --- /dev/null +++ b/net/ipv6/netfilter/ip6t_rpfilter.c @@ -0,0 +1,133 @@ +/* + * Copyright (c) 2011 Florian Westphal <fw@strlen.de> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/netdevice.h> +#include <linux/route.h> +#include <net/ip6_fib.h> +#include <net/ip6_route.h> + +#include <linux/netfilter/xt_rpfilter.h> +#include <linux/netfilter/x_tables.h> + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Florian Westphal <fw@strlen.de>"); +MODULE_DESCRIPTION("Xtables: IPv6 reverse path filter match"); + +static bool rpfilter_addr_unicast(const struct in6_addr *addr) +{ + int addr_type = ipv6_addr_type(addr); + return addr_type & IPV6_ADDR_UNICAST; +} + +static bool rpfilter_lookup_reverse6(const struct sk_buff *skb, + const struct net_device *dev, u8 flags) +{ + struct rt6_info *rt; + struct ipv6hdr *iph = ipv6_hdr(skb); + bool ret = false; + struct flowi6 fl6 = { + .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK, + .flowi6_proto = iph->nexthdr, + .daddr = iph->saddr, + }; + int lookup_flags; + + if (rpfilter_addr_unicast(&iph->daddr)) { + memcpy(&fl6.saddr, &iph->daddr, sizeof(struct in6_addr)); + lookup_flags = RT6_LOOKUP_F_HAS_SADDR; + } else { + lookup_flags = 0; + } + + fl6.flowi6_mark = flags & XT_RPFILTER_VALID_MARK ? skb->mark : 0; + if ((flags & XT_RPFILTER_LOOSE) == 0) { + fl6.flowi6_oif = dev->ifindex; + lookup_flags |= RT6_LOOKUP_F_IFACE; + } + + rt = (void *) ip6_route_lookup(dev_net(dev), &fl6, lookup_flags); + if (rt->dst.error) + goto out; + + if (rt->rt6i_flags & (RTF_REJECT|RTF_ANYCAST)) + goto out; + + if (rt->rt6i_flags & RTF_LOCAL) { + ret = flags & XT_RPFILTER_ACCEPT_LOCAL; + goto out; + } + + if (rt->rt6i_idev->dev == dev || (flags & XT_RPFILTER_LOOSE)) + ret = true; + out: + dst_release(&rt->dst); + return ret; +} + +static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par) +{ + const struct xt_rpfilter_info *info = par->matchinfo; + int saddrtype; + struct ipv6hdr *iph; + bool invert = info->flags & XT_RPFILTER_INVERT; + + if (par->in->flags & IFF_LOOPBACK) + return true ^ invert; + + iph = ipv6_hdr(skb); + saddrtype = ipv6_addr_type(&iph->saddr); + if (unlikely(saddrtype == IPV6_ADDR_ANY)) + return true ^ invert; /* not routable: forward path will drop it */ + + return rpfilter_lookup_reverse6(skb, par->in, info->flags) ^ invert; +} + +static int rpfilter_check(const struct xt_mtchk_param *par) +{ + const struct xt_rpfilter_info *info = par->matchinfo; + unsigned int options = ~XT_RPFILTER_OPTION_MASK; + + if (info->flags & options) { + pr_info("unknown options encountered"); + return -EINVAL; + } + + if (strcmp(par->table, "mangle") != 0 && + strcmp(par->table, "raw") != 0) { + pr_info("match only valid in the \'raw\' " + "or \'mangle\' tables, not \'%s\'.\n", par->table); + return -EINVAL; + } + + return 0; +} + +static struct xt_match rpfilter_mt_reg __read_mostly = { + .name = "rpfilter", + .family = NFPROTO_IPV6, + .checkentry = rpfilter_check, + .match = rpfilter_mt, + .matchsize = sizeof(struct xt_rpfilter_info), + .hooks = (1 << NF_INET_PRE_ROUTING), + .me = THIS_MODULE +}; + +static int __init rpfilter_mt_init(void) +{ + return xt_register_match(&rpfilter_mt_reg); +} + +static void __exit rpfilter_mt_exit(void) +{ + xt_unregister_match(&rpfilter_mt_reg); +} + +module_init(rpfilter_mt_init); +module_exit(rpfilter_mt_exit); diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c index c9e37c8fd62c..a8f6da97e3b2 100644 --- a/net/ipv6/netfilter/ip6table_filter.c +++ b/net/ipv6/netfilter/ip6table_filter.c @@ -44,7 +44,7 @@ ip6table_filter_hook(unsigned int hook, struct sk_buff *skb, static struct nf_hook_ops *filter_ops __read_mostly; /* Default to forward because I got too much mail already. */ -static int forward = NF_ACCEPT; +static bool forward = NF_ACCEPT; module_param(forward, bool, 0000); static int __net_init ip6table_filter_net_init(struct net *net) diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 085727263812..38f00b0298d3 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -182,7 +182,6 @@ fq_find(__be32 id, u32 user, struct in6_addr *src, struct in6_addr *dst) return container_of(q, struct nf_ct_frag6_queue, q); oom: - pr_debug("Can't alloc new queue\n"); return NULL; } @@ -370,16 +369,16 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev) struct sk_buff *clone; int i, plen = 0; - if ((clone = alloc_skb(0, GFP_ATOMIC)) == NULL) { - pr_debug("Can't alloc skb\n"); + clone = alloc_skb(0, GFP_ATOMIC); + if (clone == NULL) goto out_oom; - } + clone->next = head->next; head->next = clone; skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list; skb_frag_list_init(head); - for (i=0; i<skb_shinfo(head)->nr_frags; i++) - plen += skb_shinfo(head)->frags[i].size; + for (i = 0; i < skb_shinfo(head)->nr_frags; i++) + plen += skb_frag_size(&skb_shinfo(head)->frags[i]); clone->len = clone->data_len = head->data_len - plen; head->data_len -= clone->len; head->len -= clone->len; diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index 18ff5df7ec02..fdeb6d03da81 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -21,6 +21,7 @@ #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/stddef.h> +#include <linux/export.h> #include <net/net_namespace.h> #include <net/ip.h> #include <net/sock.h> @@ -141,11 +142,7 @@ static const struct snmp_mib snmp6_udplite6_list[] = { SNMP_MIB_SENTINEL }; -/* can be called either with percpu mib (pcpumib != NULL), - * or shared one (smib != NULL) - */ -static void snmp6_seq_show_icmpv6msg(struct seq_file *seq, void __percpu **pcpumib, - atomic_long_t *smib) +static void snmp6_seq_show_icmpv6msg(struct seq_file *seq, atomic_long_t *smib) { char name[32]; int i; @@ -162,14 +159,14 @@ static void snmp6_seq_show_icmpv6msg(struct seq_file *seq, void __percpu **pcpum snprintf(name, sizeof(name), "Icmp6%s%s", i & 0x100 ? "Out" : "In", p); seq_printf(seq, "%-32s\t%lu\n", name, - pcpumib ? snmp_fold_field(pcpumib, i) : atomic_long_read(smib + i)); + atomic_long_read(smib + i)); } /* print by number (nonzero only) - ICMPMsgStat format */ for (i = 0; i < ICMP6MSG_MIB_MAX; i++) { unsigned long val; - val = pcpumib ? snmp_fold_field(pcpumib, i) : atomic_long_read(smib + i); + val = atomic_long_read(smib + i); if (!val) continue; snprintf(name, sizeof(name), "Icmp6%sType%u", @@ -214,8 +211,7 @@ static int snmp6_seq_show(struct seq_file *seq, void *v) snmp6_ipstats_list, offsetof(struct ipstats_mib, syncp)); snmp6_seq_show_item(seq, (void __percpu **)net->mib.icmpv6_statistics, NULL, snmp6_icmp6_list); - snmp6_seq_show_icmpv6msg(seq, - (void __percpu **)net->mib.icmpv6msg_statistics, NULL); + snmp6_seq_show_icmpv6msg(seq, net->mib.icmpv6msg_statistics->mibs); snmp6_seq_show_item(seq, (void __percpu **)net->mib.udp_stats_in6, NULL, snmp6_udp6_list); snmp6_seq_show_item(seq, (void __percpu **)net->mib.udplite_stats_in6, @@ -245,7 +241,7 @@ static int snmp6_dev_seq_show(struct seq_file *seq, void *v) snmp6_ipstats_list); snmp6_seq_show_item(seq, NULL, idev->stats.icmpv6dev->mibs, snmp6_icmp6_list); - snmp6_seq_show_icmpv6msg(seq, NULL, idev->stats.icmpv6msgdev->mibs); + snmp6_seq_show_icmpv6msg(seq, idev->stats.icmpv6msgdev->mibs); return 0; } diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 343852e5c703..d02f7e4dd611 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -61,6 +61,7 @@ #include <linux/proc_fs.h> #include <linux/seq_file.h> +#include <linux/export.h> static struct raw_hashinfo raw_v6_hashinfo = { .lock = __RW_LOCK_UNLOCKED(raw_v6_hashinfo.lock), @@ -137,7 +138,7 @@ EXPORT_SYMBOL(rawv6_mh_filter_register); int rawv6_mh_filter_unregister(mh_filter_t filter) { - rcu_assign_pointer(mh_filter, NULL); + RCU_INIT_POINTER(mh_filter, NULL); synchronize_rcu(); return 0; } @@ -298,9 +299,9 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) } inet->inet_rcv_saddr = inet->inet_saddr = v4addr; - ipv6_addr_copy(&np->rcv_saddr, &addr->sin6_addr); + np->rcv_saddr = addr->sin6_addr; if (!(addr_type & IPV6_ADDR_MULTICAST)) - ipv6_addr_copy(&np->saddr, &addr->sin6_addr); + np->saddr = addr->sin6_addr; err = 0; out_unlock: rcu_read_unlock(); @@ -372,9 +373,9 @@ void raw6_icmp_error(struct sk_buff *skb, int nexthdr, read_unlock(&raw_v6_hashinfo.lock); } -static inline int rawv6_rcv_skb(struct sock * sk, struct sk_buff * skb) +static inline int rawv6_rcv_skb(struct sock *sk, struct sk_buff *skb) { - if ((raw6_sk(sk)->checksum || rcu_dereference_raw(sk->sk_filter)) && + if ((raw6_sk(sk)->checksum || rcu_access_pointer(sk->sk_filter)) && skb_checksum_complete(skb)) { atomic_inc(&sk->sk_drops); kfree_skb(skb); @@ -382,7 +383,8 @@ static inline int rawv6_rcv_skb(struct sock * sk, struct sk_buff * skb) } /* Charge it to the socket. */ - if (ip_queue_rcv_skb(sk, skb) < 0) { + skb_dst_drop(skb); + if (sock_queue_rcv_skb(sk, skb) < 0) { kfree_skb(skb); return NET_RX_DROP; } @@ -493,7 +495,7 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk, if (sin6) { sin6->sin6_family = AF_INET6; sin6->sin6_port = 0; - ipv6_addr_copy(&sin6->sin6_addr, &ipv6_hdr(skb)->saddr); + sin6->sin6_addr = ipv6_hdr(skb)->saddr; sin6->sin6_flowinfo = 0; sin6->sin6_scope_id = 0; if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) @@ -542,8 +544,7 @@ static int rawv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, goto out; offset = rp->offset; - total_len = inet_sk(sk)->cork.base.length - (skb_network_header(skb) - - skb->data); + total_len = inet_sk(sk)->cork.base.length; if (offset >= total_len - 1) { err = -EINVAL; ip6_flush_pending_frames(sk); @@ -610,6 +611,8 @@ static int rawv6_send_hdrinc(struct sock *sk, void *from, int length, struct sk_buff *skb; int err; struct rt6_info *rt = (struct rt6_info *)*dstp; + int hlen = LL_RESERVED_SPACE(rt->dst.dev); + int tlen = rt->dst.dev->needed_tailroom; if (length > rt->dst.dev->mtu) { ipv6_local_error(sk, EMSGSIZE, fl6, rt->dst.dev->mtu); @@ -619,11 +622,11 @@ static int rawv6_send_hdrinc(struct sock *sk, void *from, int length, goto out; skb = sock_alloc_send_skb(sk, - length + LL_ALLOCATED_SPACE(rt->dst.dev) + 15, + length + hlen + tlen + 15, flags & MSG_DONTWAIT, &err); if (skb == NULL) goto error; - skb_reserve(skb, LL_RESERVED_SPACE(rt->dst.dev)); + skb_reserve(skb, hlen); skb->priority = sk->sk_priority; skb->mark = sk->sk_mark; @@ -843,11 +846,11 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, goto out; if (!ipv6_addr_any(daddr)) - ipv6_addr_copy(&fl6.daddr, daddr); + fl6.daddr = *daddr; else fl6.daddr.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */ if (ipv6_addr_any(&fl6.saddr) && !ipv6_addr_any(&np->saddr)) - ipv6_addr_copy(&fl6.saddr, &np->saddr); + fl6.saddr = np->saddr; final_p = fl6_update_dst(&fl6, opt, &final); diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 7b954e2539d0..b69fae76a6f1 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -42,6 +42,7 @@ #include <linux/jhash.h> #include <linux/skbuff.h> #include <linux/slab.h> +#include <linux/export.h> #include <net/sock.h> #include <net/snmp.h> @@ -152,8 +153,8 @@ void ip6_frag_init(struct inet_frag_queue *q, void *a) fq->id = arg->id; fq->user = arg->user; - ipv6_addr_copy(&fq->saddr, arg->src); - ipv6_addr_copy(&fq->daddr, arg->dst); + fq->saddr = *arg->src; + fq->daddr = *arg->dst; } EXPORT_SYMBOL(ip6_frag_init); @@ -464,8 +465,8 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, head->next = clone; skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list; skb_frag_list_init(head); - for (i=0; i<skb_shinfo(head)->nr_frags; i++) - plen += skb_shinfo(head)->frags[i].size; + for (i = 0; i < skb_shinfo(head)->nr_frags; i++) + plen += skb_frag_size(&skb_shinfo(head)->frags[i]); clone->len = clone->data_len = head->data_len - plen; head->data_len -= clone->len; head->len -= clone->len; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index fb545edef6ea..07361dfa8085 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -26,6 +26,7 @@ #include <linux/capability.h> #include <linux/errno.h> +#include <linux/export.h> #include <linux/types.h> #include <linux/times.h> #include <linux/socket.h> @@ -61,22 +62,11 @@ #include <linux/sysctl.h> #endif -/* Set to 3 to get tracing. */ -#define RT6_DEBUG 2 - -#if RT6_DEBUG >= 3 -#define RDBG(x) printk x -#define RT6_TRACE(x...) printk(KERN_DEBUG x) -#else -#define RDBG(x) -#define RT6_TRACE(x...) do { ; } while (0) -#endif - static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort, const struct in6_addr *dest); static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie); static unsigned int ip6_default_advmss(const struct dst_entry *dst); -static unsigned int ip6_default_mtu(const struct dst_entry *dst); +static unsigned int ip6_mtu(const struct dst_entry *dst); static struct dst_entry *ip6_negative_advice(struct dst_entry *); static void ip6_dst_destroy(struct dst_entry *); static void ip6_dst_ifdown(struct dst_entry *, @@ -133,7 +123,23 @@ static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old) static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, const void *daddr) { - return __neigh_lookup_errno(&nd_tbl, daddr, dst->dev); + struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr); + if (n) + return n; + return neigh_create(&nd_tbl, daddr, dst->dev); +} + +static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev) +{ + struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dev, &rt->rt6i_gateway); + if (!n) { + n = neigh_create(&nd_tbl, &rt->rt6i_gateway, dev); + if (IS_ERR(n)) + return PTR_ERR(n); + } + dst_set_neighbour(&rt->dst, n); + + return 0; } static struct dst_ops ip6_dst_ops_template = { @@ -143,7 +149,7 @@ static struct dst_ops ip6_dst_ops_template = { .gc_thresh = 1024, .check = ip6_dst_check, .default_advmss = ip6_default_advmss, - .default_mtu = ip6_default_mtu, + .mtu = ip6_mtu, .cow_metrics = ipv6_cow_metrics, .destroy = ip6_dst_destroy, .ifdown = ip6_dst_ifdown, @@ -154,9 +160,11 @@ static struct dst_ops ip6_dst_ops_template = { .neigh_lookup = ip6_neigh_lookup, }; -static unsigned int ip6_blackhole_default_mtu(const struct dst_entry *dst) +static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst) { - return 0; + unsigned int mtu = dst_metric_raw(dst, RTAX_MTU); + + return mtu ? : dst->dev->mtu; } static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu) @@ -174,7 +182,7 @@ static struct dst_ops ip6_dst_blackhole_ops = { .protocol = cpu_to_be16(ETH_P_IPV6), .destroy = ip6_dst_destroy, .check = ip6_dst_check, - .default_mtu = ip6_blackhole_default_mtu, + .mtu = ip6_blackhole_mtu, .default_advmss = ip6_default_advmss, .update_pmtu = ip6_rt_blackhole_update_pmtu, .cow_metrics = ip6_rt_blackhole_cow_metrics, @@ -244,9 +252,9 @@ static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops, { struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, flags); - if (rt != NULL) + if (rt) memset(&rt->rt6i_table, 0, - sizeof(*rt) - sizeof(struct dst_entry)); + sizeof(*rt) - sizeof(struct dst_entry)); return rt; } @@ -260,7 +268,7 @@ static void ip6_dst_destroy(struct dst_entry *dst) if (!(rt->dst.flags & DST_HOST)) dst_destroy_metrics_generic(dst); - if (idev != NULL) { + if (idev) { rt->rt6i_idev = NULL; in6_dev_put(idev); } @@ -296,10 +304,10 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, struct net_device *loopback_dev = dev_net(dev)->loopback_dev; - if (dev != loopback_dev && idev != NULL && idev->dev == dev) { + if (dev != loopback_dev && idev && idev->dev == dev) { struct inet6_dev *loopback_idev = in6_dev_get(loopback_dev); - if (loopback_idev != NULL) { + if (loopback_idev) { rt->rt6i_idev = loopback_idev; in6_dev_put(idev); } @@ -309,7 +317,7 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, static __inline__ int rt6_check_expired(const struct rt6_info *rt) { return (rt->rt6i_flags & RTF_EXPIRES) && - time_after(jiffies, rt->rt6i_expires); + time_after(jiffies, rt->dst.expires); } static inline int rt6_need_strict(const struct in6_addr *daddr) @@ -335,13 +343,13 @@ static inline struct rt6_info *rt6_device_match(struct net *net, goto out; for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) { - struct net_device *dev = sprt->rt6i_dev; + struct net_device *dev = sprt->dst.dev; if (oif) { if (dev->ifindex == oif) return sprt; if (dev->flags & IFF_LOOPBACK) { - if (sprt->rt6i_idev == NULL || + if (!sprt->rt6i_idev || sprt->rt6i_idev->dev->ifindex != oif) { if (flags & RT6_LOOKUP_F_IFACE && oif) continue; @@ -382,7 +390,7 @@ static void rt6_probe(struct rt6_info *rt) * to no more than one per minute. */ rcu_read_lock(); - neigh = rt ? dst_get_neighbour(&rt->dst) : NULL; + neigh = rt ? dst_get_neighbour_noref(&rt->dst) : NULL; if (!neigh || (neigh->nud_state & NUD_VALID)) goto out; read_lock_bh(&neigh->lock); @@ -396,7 +404,7 @@ static void rt6_probe(struct rt6_info *rt) target = (struct in6_addr *)&neigh->primary_key; addrconf_addr_solict_mult(target, &mcaddr); - ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL); + ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL); } else { read_unlock_bh(&neigh->lock); } @@ -414,7 +422,7 @@ static inline void rt6_probe(struct rt6_info *rt) */ static inline int rt6_check_dev(struct rt6_info *rt, int oif) { - struct net_device *dev = rt->rt6i_dev; + struct net_device *dev = rt->dst.dev; if (!oif || dev->ifindex == oif) return 2; if ((dev->flags & IFF_LOOPBACK) && @@ -429,7 +437,7 @@ static inline int rt6_check_neigh(struct rt6_info *rt) int m; rcu_read_lock(); - neigh = dst_get_neighbour(&rt->dst); + neigh = dst_get_neighbour_noref(&rt->dst); if (rt->rt6i_flags & RTF_NONEXTHOP || !(rt->rt6i_flags & RTF_GATEWAY)) m = 1; @@ -515,9 +523,6 @@ static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict) struct rt6_info *match, *rt0; struct net *net; - RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n", - __func__, fn->leaf, oif); - rt0 = fn->rr_ptr; if (!rt0) fn->rr_ptr = rt0 = fn->leaf; @@ -536,10 +541,7 @@ static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict) fn->rr_ptr = next; } - RT6_TRACE("%s() => %p\n", - __func__, match); - - net = dev_net(rt0->rt6i_dev); + net = dev_net(rt0->dst.dev); return match ? match : net->ipv6.ip6_null_entry; } @@ -608,7 +610,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, if (!addrconf_finite_timeout(lifetime)) { rt->rt6i_flags &= ~RTF_EXPIRES; } else { - rt->rt6i_expires = jiffies + HZ * lifetime; + rt->dst.expires = jiffies + HZ * lifetime; rt->rt6i_flags |= RTF_EXPIRES; } dst_release(&rt->dst); @@ -633,7 +635,7 @@ do { \ goto restart; \ } \ } \ -} while(0) +} while (0) static struct rt6_info *ip6_pol_route_lookup(struct net *net, struct fib6_table *table, @@ -655,6 +657,13 @@ out: } +struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6, + int flags) +{ + return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup); +} +EXPORT_SYMBOL_GPL(ip6_route_lookup); + struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr, const struct in6_addr *saddr, int oif, int strict) { @@ -703,7 +712,7 @@ static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info) int ip6_ins_rt(struct rt6_info *rt) { struct nl_info info = { - .nl_net = dev_net(rt->rt6i_dev), + .nl_net = dev_net(rt->dst.dev), }; return __ip6_ins_rt(rt, &info); } @@ -721,29 +730,27 @@ static struct rt6_info *rt6_alloc_cow(const struct rt6_info *ort, rt = ip6_rt_copy(ort, daddr); if (rt) { - struct neighbour *neigh; int attempts = !in_softirq(); - if (!(rt->rt6i_flags&RTF_GATEWAY)) { - if (rt->rt6i_dst.plen != 128 && + if (!(rt->rt6i_flags & RTF_GATEWAY)) { + if (ort->rt6i_dst.plen != 128 && ipv6_addr_equal(&ort->rt6i_dst.addr, daddr)) rt->rt6i_flags |= RTF_ANYCAST; - ipv6_addr_copy(&rt->rt6i_gateway, daddr); + rt->rt6i_gateway = *daddr; } rt->rt6i_flags |= RTF_CACHE; #ifdef CONFIG_IPV6_SUBTREES if (rt->rt6i_src.plen && saddr) { - ipv6_addr_copy(&rt->rt6i_src.addr, saddr); + rt->rt6i_src.addr = *saddr; rt->rt6i_src.plen = 128; } #endif retry: - neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway); - if (IS_ERR(neigh)) { - struct net *net = dev_net(rt->rt6i_dev); + if (rt6_bind_neighbour(rt, rt->dst.dev)) { + struct net *net = dev_net(rt->dst.dev); int saved_rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval; int saved_rt_elasticity = @@ -768,8 +775,6 @@ static struct rt6_info *rt6_alloc_cow(const struct rt6_info *ort, dst_free(&rt->dst); return NULL; } - dst_set_neighbour(&rt->dst, neigh); - } return rt; @@ -782,7 +787,7 @@ static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, if (rt) { rt->rt6i_flags |= RTF_CACHE; - dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_raw(&ort->dst))); + dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_noref_raw(&ort->dst))); } return rt; } @@ -816,7 +821,7 @@ restart: dst_hold(&rt->dst); read_unlock_bh(&table->tb6_lock); - if (!dst_get_neighbour_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP)) + if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP)) nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr); else if (!(rt->dst.flags & DST_HOST)) nrt = rt6_alloc_clone(rt, &fl6->daddr); @@ -872,7 +877,7 @@ void ip6_route_input(struct sk_buff *skb) .flowi6_iif = skb->dev->ifindex, .daddr = iph->daddr, .saddr = iph->saddr, - .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK, + .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK, .flowi6_mark = skb->mark, .flowi6_proto = iph->nexthdr, }; @@ -929,9 +934,9 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori rt->rt6i_idev = ort->rt6i_idev; if (rt->rt6i_idev) in6_dev_hold(rt->rt6i_idev); - rt->rt6i_expires = 0; + rt->dst.expires = 0; - ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway); + rt->rt6i_gateway = ort->rt6i_gateway; rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES; rt->rt6i_metric = 0; @@ -994,7 +999,7 @@ static void ip6_link_failure(struct sk_buff *skb) rt = (struct rt6_info *) skb_dst(skb); if (rt) { - if (rt->rt6i_flags&RTF_CACHE) { + if (rt->rt6i_flags & RTF_CACHE) { dst_set_expires(&rt->dst, 0); rt->rt6i_flags |= RTF_EXPIRES; } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) @@ -1040,10 +1045,15 @@ static unsigned int ip6_default_advmss(const struct dst_entry *dst) return mtu; } -static unsigned int ip6_default_mtu(const struct dst_entry *dst) +static unsigned int ip6_mtu(const struct dst_entry *dst) { - unsigned int mtu = IPV6_MIN_MTU; struct inet6_dev *idev; + unsigned int mtu = dst_metric_raw(dst, RTAX_MTU); + + if (mtu) + return mtu; + + mtu = IPV6_MIN_MTU; rcu_read_lock(); idev = __in6_dev_get(dst->dev); @@ -1059,38 +1069,41 @@ static DEFINE_SPINLOCK(icmp6_dst_lock); struct dst_entry *icmp6_dst_alloc(struct net_device *dev, struct neighbour *neigh, - const struct in6_addr *addr) + struct flowi6 *fl6) { + struct dst_entry *dst; struct rt6_info *rt; struct inet6_dev *idev = in6_dev_get(dev); struct net *net = dev_net(dev); - if (unlikely(idev == NULL)) + if (unlikely(!idev)) return NULL; rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0); - if (unlikely(rt == NULL)) { + if (unlikely(!rt)) { in6_dev_put(idev); + dst = ERR_PTR(-ENOMEM); goto out; } if (neigh) neigh_hold(neigh); else { - neigh = ndisc_get_neigh(dev, addr); - if (IS_ERR(neigh)) - neigh = NULL; + neigh = ip6_neigh_lookup(&rt->dst, &fl6->daddr); + if (IS_ERR(neigh)) { + dst_free(&rt->dst); + return ERR_CAST(neigh); + } } rt->dst.flags |= DST_HOST; rt->dst.output = ip6_output; dst_set_neighbour(&rt->dst, neigh); atomic_set(&rt->dst.__refcnt, 1); - dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255); - - ipv6_addr_copy(&rt->rt6i_dst.addr, addr); + rt->rt6i_dst.addr = fl6->daddr; rt->rt6i_dst.plen = 128; rt->rt6i_idev = idev; + dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255); spin_lock_bh(&icmp6_dst_lock); rt->dst.next = icmp6_dst_gc_list; @@ -1099,8 +1112,10 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev, fib6_force_start_gc(net); + dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0); + out: - return &rt->dst; + return dst; } int icmp6_dst_gc(void) @@ -1230,21 +1245,30 @@ int ip6_route_add(struct fib6_config *cfg) if (cfg->fc_metric == 0) cfg->fc_metric = IP6_RT_PRIO_USER; - table = fib6_new_table(net, cfg->fc_table); - if (table == NULL) { - err = -ENOBUFS; - goto out; + err = -ENOBUFS; + if (cfg->fc_nlinfo.nlh && + !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) { + table = fib6_get_table(net, cfg->fc_table); + if (!table) { + printk(KERN_WARNING "IPv6: NLM_F_CREATE should be specified when creating new route\n"); + table = fib6_new_table(net, cfg->fc_table); + } + } else { + table = fib6_new_table(net, cfg->fc_table); } + if (!table) + goto out; + rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL, DST_NOCOUNT); - if (rt == NULL) { + if (!rt) { err = -ENOMEM; goto out; } rt->dst.obsolete = -1; - rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ? + rt->dst.expires = (cfg->fc_flags & RTF_EXPIRES) ? jiffies + clock_t_to_jiffies(cfg->fc_expires) : 0; @@ -1287,8 +1311,9 @@ int ip6_route_add(struct fib6_config *cfg) they would result in kernel looping; promote them to reject routes */ if ((cfg->fc_flags & RTF_REJECT) || - (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK) - && !(cfg->fc_flags&RTF_LOCAL))) { + (dev && (dev->flags & IFF_LOOPBACK) && + !(addr_type & IPV6_ADDR_LOOPBACK) && + !(cfg->fc_flags & RTF_LOCAL))) { /* hold loopback dev/idev if we haven't done so. */ if (dev != net->loopback_dev) { if (dev) { @@ -1315,7 +1340,7 @@ int ip6_route_add(struct fib6_config *cfg) int gwa_type; gw_addr = &cfg->fc_gateway; - ipv6_addr_copy(&rt->rt6i_gateway, gw_addr); + rt->rt6i_gateway = *gw_addr; gwa_type = ipv6_addr_type(gw_addr); if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) { @@ -1329,26 +1354,26 @@ int ip6_route_add(struct fib6_config *cfg) some exceptions. --ANK */ err = -EINVAL; - if (!(gwa_type&IPV6_ADDR_UNICAST)) + if (!(gwa_type & IPV6_ADDR_UNICAST)) goto out; grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1); err = -EHOSTUNREACH; - if (grt == NULL) + if (!grt) goto out; if (dev) { - if (dev != grt->rt6i_dev) { + if (dev != grt->dst.dev) { dst_release(&grt->dst); goto out; } } else { - dev = grt->rt6i_dev; + dev = grt->dst.dev; idev = grt->rt6i_idev; dev_hold(dev); in6_dev_hold(grt->rt6i_idev); } - if (!(grt->rt6i_flags&RTF_GATEWAY)) + if (!(grt->rt6i_flags & RTF_GATEWAY)) err = 0; dst_release(&grt->dst); @@ -1356,12 +1381,12 @@ int ip6_route_add(struct fib6_config *cfg) goto out; } err = -EINVAL; - if (dev == NULL || (dev->flags&IFF_LOOPBACK)) + if (!dev || (dev->flags & IFF_LOOPBACK)) goto out; } err = -ENODEV; - if (dev == NULL) + if (!dev) goto out; if (!ipv6_addr_any(&cfg->fc_prefsrc)) { @@ -1369,18 +1394,15 @@ int ip6_route_add(struct fib6_config *cfg) err = -EINVAL; goto out; } - ipv6_addr_copy(&rt->rt6i_prefsrc.addr, &cfg->fc_prefsrc); + rt->rt6i_prefsrc.addr = cfg->fc_prefsrc; rt->rt6i_prefsrc.plen = 128; } else rt->rt6i_prefsrc.plen = 0; if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) { - struct neighbour *n = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev); - if (IS_ERR(n)) { - err = PTR_ERR(n); + err = rt6_bind_neighbour(rt, dev); + if (err) goto out; - } - dst_set_neighbour(&rt->dst, n); } rt->rt6i_flags = cfg->fc_flags; @@ -1426,7 +1448,7 @@ static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info) { int err; struct fib6_table *table; - struct net *net = dev_net(rt->rt6i_dev); + struct net *net = dev_net(rt->dst.dev); if (rt == net->ipv6.ip6_null_entry) return -ENOENT; @@ -1445,7 +1467,7 @@ static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info) int ip6_del_rt(struct rt6_info *rt) { struct nl_info info = { - .nl_net = dev_net(rt->rt6i_dev), + .nl_net = dev_net(rt->dst.dev), }; return __ip6_del_rt(rt, &info); } @@ -1458,7 +1480,7 @@ static int ip6_route_del(struct fib6_config *cfg) int err = -ESRCH; table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table); - if (table == NULL) + if (!table) return err; read_lock_bh(&table->tb6_lock); @@ -1470,8 +1492,8 @@ static int ip6_route_del(struct fib6_config *cfg) if (fn) { for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { if (cfg->fc_ifindex && - (rt->rt6i_dev == NULL || - rt->rt6i_dev->ifindex != cfg->fc_ifindex)) + (!rt->dst.dev || + rt->dst.dev->ifindex != cfg->fc_ifindex)) continue; if (cfg->fc_flags & RTF_GATEWAY && !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway)) @@ -1533,7 +1555,7 @@ restart: continue; if (!(rt->rt6i_flags & RTF_GATEWAY)) continue; - if (fl6->flowi6_oif != rt->rt6i_dev->ifindex) + if (fl6->flowi6_oif != rt->dst.dev->ifindex) continue; if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway)) continue; @@ -1566,7 +1588,7 @@ static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest, }, }; - ipv6_addr_copy(&rdfl.gateway, gateway); + rdfl.gateway = *gateway; if (rt6_need_strict(dest)) flags |= RT6_LOOKUP_F_IFACE; @@ -1611,18 +1633,18 @@ void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src, dst_confirm(&rt->dst); /* Duplicate redirect: silently ignore. */ - if (neigh == dst_get_neighbour_raw(&rt->dst)) + if (neigh == dst_get_neighbour_noref_raw(&rt->dst)) goto out; nrt = ip6_rt_copy(rt, dest); - if (nrt == NULL) + if (!nrt) goto out; nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE; if (on_link) nrt->rt6i_flags &= ~RTF_GATEWAY; - ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key); + nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key; dst_set_neighbour(&nrt->dst, neigh_clone(neigh)); if (ip6_ins_rt(nrt)) @@ -1632,7 +1654,7 @@ void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src, netevent.new = &nrt->dst; call_netevent_notifiers(NETEVENT_REDIRECT, &netevent); - if (rt->rt6i_flags&RTF_CACHE) { + if (rt->rt6i_flags & RTF_CACHE) { ip6_del_rt(rt); return; } @@ -1653,7 +1675,7 @@ static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr int allfrag = 0; again: rt = rt6_lookup(net, daddr, saddr, ifindex, 0); - if (rt == NULL) + if (!rt) return; if (rt6_check_expired(rt)) { @@ -1703,7 +1725,7 @@ again: 1. It is connected route. Action: COW 2. It is gatewayed route or NONEXTHOP route. Action: clone it. */ - if (!dst_get_neighbour_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP)) + if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP)) nrt = rt6_alloc_cow(rt, daddr, saddr); else nrt = rt6_alloc_clone(rt, daddr); @@ -1759,7 +1781,7 @@ void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *sad static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort, const struct in6_addr *dest) { - struct net *net = dev_net(ort->rt6i_dev); + struct net *net = dev_net(ort->dst.dev); struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, ort->dst.dev, 0); @@ -1768,7 +1790,7 @@ static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort, rt->dst.output = ort->dst.output; rt->dst.flags |= DST_HOST; - ipv6_addr_copy(&rt->rt6i_dst.addr, dest); + rt->rt6i_dst.addr = *dest; rt->rt6i_dst.plen = 128; dst_copy_metrics(&rt->dst, &ort->dst); rt->dst.error = ort->dst.error; @@ -1776,9 +1798,9 @@ static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort, if (rt->rt6i_idev) in6_dev_hold(rt->rt6i_idev); rt->dst.lastuse = jiffies; - rt->rt6i_expires = 0; + rt->dst.expires = 0; - ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway); + rt->rt6i_gateway = ort->rt6i_gateway; rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES; rt->rt6i_metric = 0; @@ -1801,7 +1823,7 @@ static struct rt6_info *rt6_get_route_info(struct net *net, struct fib6_table *table; table = fib6_get_table(net, RT6_TABLE_INFO); - if (table == NULL) + if (!table) return NULL; write_lock_bh(&table->tb6_lock); @@ -1810,7 +1832,7 @@ static struct rt6_info *rt6_get_route_info(struct net *net, goto out; for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { - if (rt->rt6i_dev->ifindex != ifindex) + if (rt->dst.dev->ifindex != ifindex) continue; if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY)) continue; @@ -1841,8 +1863,8 @@ static struct rt6_info *rt6_add_route_info(struct net *net, .fc_nlinfo.nl_net = net, }; - ipv6_addr_copy(&cfg.fc_dst, prefix); - ipv6_addr_copy(&cfg.fc_gateway, gwaddr); + cfg.fc_dst = *prefix; + cfg.fc_gateway = *gwaddr; /* We should treat it as a default route if prefix length is 0. */ if (!prefixlen) @@ -1860,12 +1882,12 @@ struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_dev struct fib6_table *table; table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT); - if (table == NULL) + if (!table) return NULL; write_lock_bh(&table->tb6_lock); for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) { - if (dev == rt->rt6i_dev && + if (dev == rt->dst.dev && ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) && ipv6_addr_equal(&rt->rt6i_gateway, addr)) break; @@ -1891,7 +1913,7 @@ struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr, .fc_nlinfo.nl_net = dev_net(dev), }; - ipv6_addr_copy(&cfg.fc_gateway, gwaddr); + cfg.fc_gateway = *gwaddr; ip6_route_add(&cfg); @@ -1905,7 +1927,7 @@ void rt6_purge_dflt_routers(struct net *net) /* NOTE: Keep consistent with rt6_get_dflt_router */ table = fib6_get_table(net, RT6_TABLE_DFLT); - if (table == NULL) + if (!table) return; restart: @@ -1937,9 +1959,9 @@ static void rtmsg_to_fib6_config(struct net *net, cfg->fc_nlinfo.nl_net = net; - ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst); - ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src); - ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway); + cfg->fc_dst = rtmsg->rtmsg_dst; + cfg->fc_src = rtmsg->rtmsg_src; + cfg->fc_gateway = rtmsg->rtmsg_gateway; } int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg) @@ -2038,14 +2060,14 @@ static int ip6_pkt_prohibit_out(struct sk_buff *skb) struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, const struct in6_addr *addr, - int anycast) + bool anycast) { struct net *net = dev_net(idev->dev); struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, net->loopback_dev, 0); - struct neighbour *neigh; + int err; - if (rt == NULL) { + if (!rt) { if (net_ratelimit()) pr_warning("IPv6: Maximum number of routes reached," " consider increasing route/max_size.\n"); @@ -2065,15 +2087,13 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, rt->rt6i_flags |= RTF_ANYCAST; else rt->rt6i_flags |= RTF_LOCAL; - neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway); - if (IS_ERR(neigh)) { + err = rt6_bind_neighbour(rt, rt->dst.dev); + if (err) { dst_free(&rt->dst); - - return ERR_CAST(neigh); + return ERR_PTR(err); } - dst_set_neighbour(&rt->dst, neigh); - ipv6_addr_copy(&rt->rt6i_dst.addr, addr); + rt->rt6i_dst.addr = *addr; rt->rt6i_dst.plen = 128; rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL); @@ -2091,7 +2111,7 @@ int ip6_route_get_saddr(struct net *net, struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt); int err = 0; if (rt->rt6i_prefsrc.plen) - ipv6_addr_copy(saddr, &rt->rt6i_prefsrc.addr); + *saddr = rt->rt6i_prefsrc.addr; else err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL, daddr, prefs, saddr); @@ -2111,7 +2131,7 @@ static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg) struct net *net = ((struct arg_dev_net_ip *)arg)->net; struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr; - if (((void *)rt->rt6i_dev == dev || dev == NULL) && + if (((void *)rt->dst.dev == dev || !dev) && rt != net->ipv6.ip6_null_entry && ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) { /* remove prefsrc entry */ @@ -2141,11 +2161,10 @@ static int fib6_ifdown(struct rt6_info *rt, void *arg) const struct arg_dev_net *adn = arg; const struct net_device *dev = adn->dev; - if ((rt->rt6i_dev == dev || dev == NULL) && - rt != adn->net->ipv6.ip6_null_entry) { - RT6_TRACE("deleted by ifdown %p\n", rt); + if ((rt->dst.dev == dev || !dev) && + rt != adn->net->ipv6.ip6_null_entry) return -1; - } + return 0; } @@ -2178,7 +2197,7 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg) */ idev = __in6_dev_get(arg->dev); - if (idev == NULL) + if (!idev) return 0; /* For administrative MTU increase, there is no way to discover @@ -2195,7 +2214,7 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg) also have the lowest MTU, TOO BIG MESSAGE will be lead to PMTU discouvery. */ - if (rt->rt6i_dev == arg->dev && + if (rt->dst.dev == arg->dev && !dst_metric_locked(&rt->dst, RTAX_MTU) && (dst_mtu(&rt->dst) >= arg->mtu || (dst_mtu(&rt->dst) < arg->mtu && @@ -2344,11 +2363,13 @@ static int rt6_fill_node(struct net *net, int iif, int type, u32 pid, u32 seq, int prefix, int nowait, unsigned int flags) { + const struct inet_peer *peer; struct rtmsg *rtm; struct nlmsghdr *nlh; long expires; u32 table; struct neighbour *n; + u32 ts, tsage; if (prefix) { /* user wants prefix routes only */ if (!(rt->rt6i_flags & RTF_PREFIX_RT)) { @@ -2358,7 +2379,7 @@ static int rt6_fill_node(struct net *net, } nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags); - if (nlh == NULL) + if (!nlh) return -EMSGSIZE; rtm = nlmsg_data(nlh); @@ -2372,25 +2393,25 @@ static int rt6_fill_node(struct net *net, table = RT6_TABLE_UNSPEC; rtm->rtm_table = table; NLA_PUT_U32(skb, RTA_TABLE, table); - if (rt->rt6i_flags&RTF_REJECT) + if (rt->rt6i_flags & RTF_REJECT) rtm->rtm_type = RTN_UNREACHABLE; - else if (rt->rt6i_flags&RTF_LOCAL) + else if (rt->rt6i_flags & RTF_LOCAL) rtm->rtm_type = RTN_LOCAL; - else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK)) + else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK)) rtm->rtm_type = RTN_LOCAL; else rtm->rtm_type = RTN_UNICAST; rtm->rtm_flags = 0; rtm->rtm_scope = RT_SCOPE_UNIVERSE; rtm->rtm_protocol = rt->rt6i_protocol; - if (rt->rt6i_flags&RTF_DYNAMIC) + if (rt->rt6i_flags & RTF_DYNAMIC) rtm->rtm_protocol = RTPROT_REDIRECT; else if (rt->rt6i_flags & RTF_ADDRCONF) rtm->rtm_protocol = RTPROT_KERNEL; - else if (rt->rt6i_flags&RTF_DEFAULT) + else if (rt->rt6i_flags & RTF_DEFAULT) rtm->rtm_protocol = RTPROT_RA; - if (rt->rt6i_flags&RTF_CACHE) + if (rt->rt6i_flags & RTF_CACHE) rtm->rtm_flags |= RTM_F_CLONED; if (dst) { @@ -2430,7 +2451,7 @@ static int rt6_fill_node(struct net *net, if (rt->rt6i_prefsrc.plen) { struct in6_addr saddr_buf; - ipv6_addr_copy(&saddr_buf, &rt->rt6i_prefsrc.addr); + saddr_buf = rt->rt6i_prefsrc.addr; NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf); } @@ -2438,24 +2459,31 @@ static int rt6_fill_node(struct net *net, goto nla_put_failure; rcu_read_lock(); - n = dst_get_neighbour(&rt->dst); + n = dst_get_neighbour_noref(&rt->dst); if (n) NLA_PUT(skb, RTA_GATEWAY, 16, &n->primary_key); rcu_read_unlock(); if (rt->dst.dev) - NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex); + NLA_PUT_U32(skb, RTA_OIF, rt->dst.dev->ifindex); NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric); if (!(rt->rt6i_flags & RTF_EXPIRES)) expires = 0; - else if (rt->rt6i_expires - jiffies < INT_MAX) - expires = rt->rt6i_expires - jiffies; + else if (rt->dst.expires - jiffies < INT_MAX) + expires = rt->dst.expires - jiffies; else expires = INT_MAX; - if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0, + peer = rt->rt6i_peer; + ts = tsage = 0; + if (peer && peer->tcp_ts_stamp) { + ts = peer->tcp_ts; + tsage = get_seconds() - peer->tcp_ts_stamp; + } + + if (rtnl_put_cacheinfo(skb, &rt->dst, 0, ts, tsage, expires, rt->dst.error) < 0) goto nla_put_failure; @@ -2504,14 +2532,14 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr)) goto errout; - ipv6_addr_copy(&fl6.saddr, nla_data(tb[RTA_SRC])); + fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]); } if (tb[RTA_DST]) { if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr)) goto errout; - ipv6_addr_copy(&fl6.daddr, nla_data(tb[RTA_DST])); + fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]); } if (tb[RTA_IIF]) @@ -2530,7 +2558,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void } skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); - if (skb == NULL) { + if (!skb) { err = -ENOBUFS; goto errout; } @@ -2565,10 +2593,10 @@ void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info) int err; err = -ENOBUFS; - seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0; + seq = info->nlh ? info->nlh->nlmsg_seq : 0; skb = nlmsg_new(rt6_nlmsg_size(), gfp_any()); - if (skb == NULL) + if (!skb) goto errout; err = rt6_fill_node(net, skb, rt, NULL, NULL, 0, @@ -2635,7 +2663,7 @@ static int rt6_info_route(struct rt6_info *rt, void *p_arg) seq_puts(m, "00000000000000000000000000000000 00 "); #endif rcu_read_lock(); - n = dst_get_neighbour(&rt->dst); + n = dst_get_neighbour_noref(&rt->dst); if (n) { seq_printf(m, "%pi6", n->primary_key); } else { @@ -2645,14 +2673,14 @@ static int rt6_info_route(struct rt6_info *rt, void *p_arg) seq_printf(m, " %08x %08x %08x %08x %8s\n", rt->rt6i_metric, atomic_read(&rt->dst.__refcnt), rt->dst.__use, rt->rt6i_flags, - rt->rt6i_dev ? rt->rt6i_dev->name : ""); + rt->dst.dev ? rt->dst.dev->name : ""); return 0; } static int ipv6_route_show(struct seq_file *m, void *v) { struct net *net = (struct net *)m->private; - fib6_clean_all(net, rt6_info_route, 0, m); + fib6_clean_all_ro(net, rt6_info_route, 0, m); return 0; } diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 00b15ac7a702..133768e52912 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -91,7 +91,7 @@ struct pcpu_tstats { unsigned long rx_bytes; unsigned long tx_packets; unsigned long tx_bytes; -}; +} __attribute__((aligned(4*sizeof(unsigned long)))); static struct net_device_stats *ipip6_get_stats(struct net_device *dev) { @@ -263,6 +263,8 @@ static struct ip_tunnel *ipip6_tunnel_locate(struct net *net, if (register_netdevice(dev) < 0) goto failed_free; + strcpy(nt->parms.name, dev->name); + dev_hold(dev); ipip6_tunnel_link(sitn, nt); @@ -474,7 +476,7 @@ static void ipip6_tunnel_uninit(struct net_device *dev) struct sit_net *sitn = net_generic(net, sit_net_id); if (dev == sitn->fb_tunnel_dev) { - rcu_assign_pointer(sitn->tunnels_wc[0], NULL); + RCU_INIT_POINTER(sitn->tunnels_wc[0], NULL); } else { ipip6_tunnel_unlink(sitn, netdev_priv(dev)); ipip6_tunnel_del_prl(netdev_priv(dev), NULL); @@ -680,7 +682,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, struct neighbour *neigh = NULL; if (skb_dst(skb)) - neigh = dst_get_neighbour(skb_dst(skb)); + neigh = dst_get_neighbour_noref(skb_dst(skb)); if (neigh == NULL) { if (net_ratelimit()) @@ -705,7 +707,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, struct neighbour *neigh = NULL; if (skb_dst(skb)) - neigh = dst_get_neighbour(skb_dst(skb)); + neigh = dst_get_neighbour_noref(skb_dst(skb)); if (neigh == NULL) { if (net_ratelimit()) @@ -914,7 +916,7 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) goto done; #ifdef CONFIG_IPV6_SIT_6RD } else { - ipv6_addr_copy(&ip6rd.prefix, &t->ip6rd.prefix); + ip6rd.prefix = t->ip6rd.prefix; ip6rd.relay_prefix = t->ip6rd.relay_prefix; ip6rd.prefixlen = t->ip6rd.prefixlen; ip6rd.relay_prefixlen = t->ip6rd.relay_prefixlen; @@ -1082,7 +1084,7 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) if (relay_prefix != ip6rd.relay_prefix) goto done; - ipv6_addr_copy(&t->ip6rd.prefix, &prefix); + t->ip6rd.prefix = prefix; t->ip6rd.relay_prefix = relay_prefix; t->ip6rd.prefixlen = ip6rd.prefixlen; t->ip6rd.relay_prefixlen = ip6rd.relay_prefixlen; @@ -1144,7 +1146,6 @@ static int ipip6_tunnel_init(struct net_device *dev) struct ip_tunnel *tunnel = netdev_priv(dev); tunnel->dev = dev; - strcpy(tunnel->parms.name, dev->name); memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4); memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); @@ -1207,6 +1208,7 @@ static void __net_exit sit_destroy_tunnels(struct sit_net *sitn, struct list_hea static int __net_init sit_init_net(struct net *net) { struct sit_net *sitn = net_generic(net, sit_net_id); + struct ip_tunnel *t; int err; sitn->tunnels[0] = sitn->tunnels_wc; @@ -1231,6 +1233,9 @@ static int __net_init sit_init_net(struct net *net) if ((err = register_netdev(sitn->fb_tunnel_dev))) goto err_reg_dev; + t = netdev_priv(sitn->fb_tunnel_dev); + + strcpy(t->parms.name, sitn->fb_tunnel_dev->name); return 0; err_reg_dev: diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index ac838965ff34..8e951d8d3b81 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -115,7 +115,7 @@ static __u32 check_tcp_syn_cookie(__u32 cookie, const struct in6_addr *saddr, & COOKIEMASK; } -__u32 cookie_v6_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp) +__u32 cookie_v6_init_sequence(struct sock *sk, const struct sk_buff *skb, __u16 *mssp) { const struct ipv6hdr *iph = ipv6_hdr(skb); const struct tcphdr *th = tcp_hdr(skb); @@ -137,7 +137,7 @@ __u32 cookie_v6_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp) jiffies / (HZ * 60), mssind); } -static inline int cookie_check(struct sk_buff *skb, __u32 cookie) +static inline int cookie_check(const struct sk_buff *skb, __u32 cookie) { const struct ipv6hdr *iph = ipv6_hdr(skb); const struct tcphdr *th = tcp_hdr(skb); @@ -152,7 +152,7 @@ static inline int cookie_check(struct sk_buff *skb, __u32 cookie) struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) { struct tcp_options_received tcp_opt; - u8 *hash_location; + const u8 *hash_location; struct inet_request_sock *ireq; struct inet6_request_sock *ireq6; struct tcp_request_sock *treq; @@ -200,8 +200,8 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) req->mss = mss; ireq->rmt_port = th->source; ireq->loc_port = th->dest; - ipv6_addr_copy(&ireq6->rmt_addr, &ipv6_hdr(skb)->saddr); - ipv6_addr_copy(&ireq6->loc_addr, &ipv6_hdr(skb)->daddr); + ireq6->rmt_addr = ipv6_hdr(skb)->saddr; + ireq6->loc_addr = ipv6_hdr(skb)->daddr; if (ipv6_opt_accepted(sk, skb) || np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) { @@ -237,9 +237,9 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) struct flowi6 fl6; memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_proto = IPPROTO_TCP; - ipv6_addr_copy(&fl6.daddr, &ireq6->rmt_addr); + fl6.daddr = ireq6->rmt_addr; final_p = fl6_update_dst(&fl6, np->opt, &final); - ipv6_addr_copy(&fl6.saddr, &ireq6->loc_addr); + fl6.saddr = ireq6->loc_addr; fl6.flowi6_oif = sk->sk_bound_dev_if; fl6.flowi6_mark = sk->sk_mark; fl6.fl6_dport = inet_rsk(req)->rmt_port; diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index 6dcf5e7d661b..166a57c47d39 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -10,6 +10,7 @@ #include <linux/in6.h> #include <linux/ipv6.h> #include <linux/slab.h> +#include <linux/export.h> #include <net/ndisc.h> #include <net/ipv6.h> #include <net/addrconf.h> diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 7b8fc5794352..906c7ca43542 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -62,6 +62,7 @@ #include <net/netdma.h> #include <net/inet_common.h> #include <net/secure_seq.h> +#include <net/tcp_memcontrol.h> #include <asm/uaccess.h> @@ -114,7 +115,7 @@ static __inline__ __sum16 tcp_v6_check(int len, return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base); } -static __u32 tcp_v6_init_sequence(struct sk_buff *skb) +static __u32 tcp_v6_init_sequence(const struct sk_buff *skb) { return secure_tcpv6_sequence_number(ipv6_hdr(skb)->daddr.s6_addr32, ipv6_hdr(skb)->saddr.s6_addr32, @@ -153,7 +154,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); if (flowlabel == NULL) return -EINVAL; - ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst); + usin->sin6_addr = flowlabel->dst; fl6_sock_release(flowlabel); } } @@ -195,7 +196,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, tp->write_seq = 0; } - ipv6_addr_copy(&np->daddr, &usin->sin6_addr); + np->daddr = usin->sin6_addr; np->flow_label = fl6.flowlabel; /* @@ -244,9 +245,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, saddr = &np->rcv_saddr; fl6.flowi6_proto = IPPROTO_TCP; - ipv6_addr_copy(&fl6.daddr, &np->daddr); - ipv6_addr_copy(&fl6.saddr, - (saddr ? saddr : &np->saddr)); + fl6.daddr = np->daddr; + fl6.saddr = saddr ? *saddr : np->saddr; fl6.flowi6_oif = sk->sk_bound_dev_if; fl6.flowi6_mark = sk->sk_mark; fl6.fl6_dport = usin->sin6_port; @@ -264,11 +264,11 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, if (saddr == NULL) { saddr = &fl6.saddr; - ipv6_addr_copy(&np->rcv_saddr, saddr); + np->rcv_saddr = *saddr; } /* set the source address */ - ipv6_addr_copy(&np->saddr, saddr); + np->saddr = *saddr; inet->inet_rcv_saddr = LOOPBACK4_IPV6; sk->sk_gso_type = SKB_GSO_TCPV6; @@ -398,8 +398,8 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, */ memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_proto = IPPROTO_TCP; - ipv6_addr_copy(&fl6.daddr, &np->daddr); - ipv6_addr_copy(&fl6.saddr, &np->saddr); + fl6.daddr = np->daddr; + fl6.saddr = np->saddr; fl6.flowi6_oif = sk->sk_bound_dev_if; fl6.flowi6_mark = sk->sk_mark; fl6.fl6_dport = inet->inet_dport; @@ -489,8 +489,8 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req, memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_proto = IPPROTO_TCP; - ipv6_addr_copy(&fl6.daddr, &treq->rmt_addr); - ipv6_addr_copy(&fl6.saddr, &treq->loc_addr); + fl6.daddr = treq->rmt_addr; + fl6.saddr = treq->loc_addr; fl6.flowlabel = 0; fl6.flowi6_oif = treq->iif; fl6.flowi6_mark = sk->sk_mark; @@ -512,8 +512,8 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req, if (skb) { __tcp_v6_send_check(skb, &treq->loc_addr, &treq->rmt_addr); - ipv6_addr_copy(&fl6.daddr, &treq->rmt_addr); - err = ip6_xmit(sk, skb, &fl6, opt); + fl6.daddr = treq->rmt_addr; + err = ip6_xmit(sk, skb, &fl6, opt, np->tclass); err = net_xmit_eval(err); } @@ -617,8 +617,7 @@ static int tcp_v6_md5_do_add(struct sock *sk, const struct in6_addr *peer, tp->md5sig_info->alloced6++; } - ipv6_addr_copy(&tp->md5sig_info->keys6[tp->md5sig_info->entries6].addr, - peer); + tp->md5sig_info->keys6[tp->md5sig_info->entries6].addr = *peer; tp->md5sig_info->keys6[tp->md5sig_info->entries6].base.key = newkey; tp->md5sig_info->keys6[tp->md5sig_info->entries6].base.keylen = newkeylen; @@ -750,8 +749,8 @@ static int tcp_v6_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp, bp = &hp->md5_blk.ip6; /* 1. TCP pseudo-header (RFC2460) */ - ipv6_addr_copy(&bp->saddr, saddr); - ipv6_addr_copy(&bp->daddr, daddr); + bp->saddr = *saddr; + bp->daddr = *daddr; bp->protocol = cpu_to_be32(IPPROTO_TCP); bp->len = cpu_to_be32(nbytes); @@ -761,7 +760,7 @@ static int tcp_v6_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp, static int tcp_v6_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key, const struct in6_addr *daddr, struct in6_addr *saddr, - struct tcphdr *th) + const struct tcphdr *th) { struct tcp_md5sig_pool *hp; struct hash_desc *desc; @@ -793,13 +792,14 @@ clear_hash_noput: } static int tcp_v6_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key, - struct sock *sk, struct request_sock *req, - struct sk_buff *skb) + const struct sock *sk, + const struct request_sock *req, + const struct sk_buff *skb) { const struct in6_addr *saddr, *daddr; struct tcp_md5sig_pool *hp; struct hash_desc *desc; - struct tcphdr *th = tcp_hdr(skb); + const struct tcphdr *th = tcp_hdr(skb); if (sk) { saddr = &inet6_sk(sk)->saddr; @@ -842,12 +842,12 @@ clear_hash_noput: return 1; } -static int tcp_v6_inbound_md5_hash (struct sock *sk, struct sk_buff *skb) +static int tcp_v6_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb) { - __u8 *hash_location = NULL; + const __u8 *hash_location = NULL; struct tcp_md5sig_key *hash_expected; const struct ipv6hdr *ip6h = ipv6_hdr(skb); - struct tcphdr *th = tcp_hdr(skb); + const struct tcphdr *th = tcp_hdr(skb); int genhash; u8 newhash[16]; @@ -978,9 +978,10 @@ static int tcp6_gro_complete(struct sk_buff *skb) } static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, - u32 ts, struct tcp_md5sig_key *key, int rst) + u32 ts, struct tcp_md5sig_key *key, int rst, u8 tclass) { - struct tcphdr *th = tcp_hdr(skb), *t1; + const struct tcphdr *th = tcp_hdr(skb); + struct tcphdr *t1; struct sk_buff *buff; struct flowi6 fl6; struct net *net = dev_net(skb_dst(skb)->dev); @@ -1037,8 +1038,8 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, #endif memset(&fl6, 0, sizeof(fl6)); - ipv6_addr_copy(&fl6.daddr, &ipv6_hdr(skb)->saddr); - ipv6_addr_copy(&fl6.saddr, &ipv6_hdr(skb)->daddr); + fl6.daddr = ipv6_hdr(skb)->saddr; + fl6.saddr = ipv6_hdr(skb)->daddr; buff->ip_summed = CHECKSUM_PARTIAL; buff->csum = 0; @@ -1058,7 +1059,7 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL, false); if (!IS_ERR(dst)) { skb_dst_set(buff, dst); - ip6_xmit(ctl_sk, buff, &fl6, NULL); + ip6_xmit(ctl_sk, buff, &fl6, NULL, tclass); TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); if (rst) TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS); @@ -1070,7 +1071,7 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb) { - struct tcphdr *th = tcp_hdr(skb); + const struct tcphdr *th = tcp_hdr(skb); u32 seq = 0, ack_seq = 0; struct tcp_md5sig_key *key = NULL; @@ -1091,13 +1092,13 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb) ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len - (th->doff << 2); - tcp_v6_send_response(skb, seq, ack_seq, 0, 0, key, 1); + tcp_v6_send_response(skb, seq, ack_seq, 0, 0, key, 1, 0); } static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts, - struct tcp_md5sig_key *key) + struct tcp_md5sig_key *key, u8 tclass) { - tcp_v6_send_response(skb, seq, ack, win, ts, key, 0); + tcp_v6_send_response(skb, seq, ack, win, ts, key, 0, tclass); } static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) @@ -1107,7 +1108,8 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) tcp_v6_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, - tcptw->tw_ts_recent, tcp_twsk_md5_key(tcptw)); + tcptw->tw_ts_recent, tcp_twsk_md5_key(tcptw), + tw->tw_tclass); inet_twsk_put(tw); } @@ -1116,7 +1118,7 @@ static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, struct request_sock *req) { tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent, - tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr)); + tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr), 0); } @@ -1160,7 +1162,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) { struct tcp_extend_values tmp_ext; struct tcp_options_received tmp_opt; - u8 *hash_location; + const u8 *hash_location; struct request_sock *req; struct inet6_request_sock *treq; struct ipv6_pinfo *np = inet6_sk(sk); @@ -1247,11 +1249,18 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) tcp_openreq_init(req, &tmp_opt, skb); treq = inet6_rsk(req); - ipv6_addr_copy(&treq->rmt_addr, &ipv6_hdr(skb)->saddr); - ipv6_addr_copy(&treq->loc_addr, &ipv6_hdr(skb)->daddr); + treq->rmt_addr = ipv6_hdr(skb)->saddr; + treq->loc_addr = ipv6_hdr(skb)->daddr; if (!want_cookie || tmp_opt.tstamp_ok) TCP_ECN_create_request(req, tcp_hdr(skb)); + treq->iif = sk->sk_bound_dev_if; + + /* So that link locals have meaning */ + if (!sk->sk_bound_dev_if && + ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL) + treq->iif = inet6_iif(skb); + if (!isn) { struct inet_peer *peer = NULL; @@ -1261,12 +1270,6 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) atomic_inc(&skb->users); treq->pktopts = skb; } - treq->iif = sk->sk_bound_dev_if; - - /* So that link locals have meaning */ - if (!sk->sk_bound_dev_if && - ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL) - treq->iif = inet6_iif(skb); if (want_cookie) { isn = cookie_v6_init_sequence(sk, skb, &req->mss); @@ -1377,7 +1380,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, ipv6_addr_set_v4mapped(newinet->inet_saddr, &newnp->saddr); - ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr); + newnp->rcv_saddr = newnp->saddr; inet_csk(newsk)->icsk_af_ops = &ipv6_mapped; newsk->sk_backlog_rcv = tcp_v4_do_rcv; @@ -1441,9 +1444,9 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, memcpy(newnp, np, sizeof(struct ipv6_pinfo)); - ipv6_addr_copy(&newnp->daddr, &treq->rmt_addr); - ipv6_addr_copy(&newnp->saddr, &treq->loc_addr); - ipv6_addr_copy(&newnp->rcv_saddr, &treq->loc_addr); + newnp->daddr = treq->rmt_addr; + newnp->saddr = treq->loc_addr; + newnp->rcv_saddr = treq->loc_addr; newsk->sk_bound_dev_if = treq->iif; /* Now IPv6 options... @@ -1608,7 +1611,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) opt_skb = skb_clone(skb, GFP_ATOMIC); if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ - sock_rps_save_rxhash(sk, skb->rxhash); + sock_rps_save_rxhash(sk, skb); if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) goto reset; if (opt_skb) @@ -1630,7 +1633,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) * the new socket.. */ if(nsk != sk) { - sock_rps_save_rxhash(nsk, skb->rxhash); + sock_rps_save_rxhash(nsk, skb); if (tcp_child_process(sk, nsk, skb)) goto reset; if (opt_skb) @@ -1638,7 +1641,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) return 0; } } else - sock_rps_save_rxhash(sk, skb->rxhash); + sock_rps_save_rxhash(sk, skb); if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) goto reset; @@ -1688,7 +1691,7 @@ ipv6_pktoptions: static int tcp_v6_rcv(struct sk_buff *skb) { - struct tcphdr *th; + const struct tcphdr *th; const struct ipv6hdr *hdr; struct sock *sk; int ret; @@ -1722,7 +1725,7 @@ static int tcp_v6_rcv(struct sk_buff *skb) skb->len - th->doff*4); TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); TCP_SKB_CB(skb)->when = 0; - TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(hdr); + TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr); TCP_SKB_CB(skb)->sacked = 0; sk = __inet6_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest); @@ -1856,8 +1859,8 @@ static struct inet_peer *tcp_v6_get_peer(struct sock *sk, bool *release_it) static void *tcp_v6_tw_get_peer(struct sock *sk) { - struct inet6_timewait_sock *tw6 = inet6_twsk(sk); - struct inet_timewait_sock *tw = inet_twsk(sk); + const struct inet6_timewait_sock *tw6 = inet6_twsk(sk); + const struct inet_timewait_sock *tw = inet_twsk(sk); if (tw->tw_family == AF_INET) return tcp_v4_tw_get_peer(sk); @@ -1992,7 +1995,8 @@ static int tcp_v6_init_sock(struct sock *sk) sk->sk_rcvbuf = sysctl_tcp_rmem[1]; local_bh_disable(); - percpu_counter_inc(&tcp_sockets_allocated); + sock_update_memcg(sk); + sk_sockets_allocated_inc(sk); local_bh_enable(); return 0; @@ -2012,7 +2016,7 @@ static void tcp_v6_destroy_sock(struct sock *sk) #ifdef CONFIG_PROC_FS /* Proc filesystem TCPv6 sock list dumping. */ static void get_openreq6(struct seq_file *seq, - struct sock *sk, struct request_sock *req, int i, int uid) + const struct sock *sk, struct request_sock *req, int i, int uid) { int ttd = req->expires - jiffies; const struct in6_addr *src = &inet6_rsk(req)->loc_addr; @@ -2048,10 +2052,10 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) __u16 destp, srcp; int timer_active; unsigned long timer_expires; - struct inet_sock *inet = inet_sk(sp); - struct tcp_sock *tp = tcp_sk(sp); + const struct inet_sock *inet = inet_sk(sp); + const struct tcp_sock *tp = tcp_sk(sp); const struct inet_connection_sock *icsk = inet_csk(sp); - struct ipv6_pinfo *np = inet6_sk(sp); + const struct ipv6_pinfo *np = inet6_sk(sp); dest = &np->daddr; src = &np->rcv_saddr; @@ -2103,7 +2107,7 @@ static void get_timewait6_sock(struct seq_file *seq, { const struct in6_addr *dest, *src; __u16 destp, srcp; - struct inet6_timewait_sock *tw6 = inet6_twsk((struct sock *)tw); + const struct inet6_timewait_sock *tw6 = inet6_twsk((struct sock *)tw); int ttd = tw->tw_ttd - jiffies; if (ttd < 0) @@ -2158,12 +2162,18 @@ out: return 0; } +static const struct file_operations tcp6_afinfo_seq_fops = { + .owner = THIS_MODULE, + .open = tcp_seq_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_net +}; + static struct tcp_seq_afinfo tcp6_seq_afinfo = { .name = "tcp6", .family = AF_INET6, - .seq_fops = { - .owner = THIS_MODULE, - }, + .seq_fops = &tcp6_afinfo_seq_fops, .seq_ops = { .show = tcp6_seq_show, }, @@ -2205,7 +2215,6 @@ struct proto tcpv6_prot = { .memory_allocated = &tcp_memory_allocated, .memory_pressure = &tcp_memory_pressure, .orphan_count = &tcp_orphan_count, - .sysctl_mem = sysctl_tcp_mem, .sysctl_wmem = sysctl_tcp_wmem, .sysctl_rmem = sysctl_tcp_rmem, .max_header = MAX_TCP_HEADER, @@ -2219,6 +2228,9 @@ struct proto tcpv6_prot = { .compat_setsockopt = compat_tcp_setsockopt, .compat_getsockopt = compat_tcp_getsockopt, #endif +#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM + .proto_cgroup = tcp_proto_cgroup, +#endif }; static const struct inet6_protocol tcpv6_protocol = { diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index bb95e8e1c6f9..4f96b5c63685 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -238,7 +238,7 @@ exact_match: return result; } -static struct sock *__udp6_lib_lookup(struct net *net, +struct sock *__udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be16 sport, const struct in6_addr *daddr, __be16 dport, int dif, struct udp_table *udptable) @@ -305,6 +305,7 @@ begin: rcu_read_unlock(); return result; } +EXPORT_SYMBOL_GPL(__udp6_lib_lookup); static struct sock *__udp6_lib_lookup_skb(struct sk_buff *skb, __be16 sport, __be16 dport, @@ -340,7 +341,7 @@ int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk, struct ipv6_pinfo *np = inet6_sk(sk); struct inet_sock *inet = inet_sk(sk); struct sk_buff *skb; - unsigned int ulen; + unsigned int ulen, copied; int peeked; int err; int is_udplite = IS_UDPLITE(sk); @@ -363,9 +364,10 @@ try_again: goto out; ulen = skb->len - sizeof(struct udphdr); - if (len > ulen) - len = ulen; - else if (len < ulen) + copied = len; + if (copied > ulen) + copied = ulen; + else if (copied < ulen) msg->msg_flags |= MSG_TRUNC; is_udp4 = (skb->protocol == htons(ETH_P_IP)); @@ -376,14 +378,14 @@ try_again: * coverage checksum (UDP-Lite), do it before the copy. */ - if (len < ulen || UDP_SKB_CB(skb)->partial_cov) { + if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) { if (udp_lib_checksum_complete(skb)) goto csum_copy_err; } if (skb_csum_unnecessary(skb)) err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), - msg->msg_iov,len); + msg->msg_iov, copied ); else { err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov); if (err == -EINVAL) @@ -417,8 +419,7 @@ try_again: ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr, &sin6->sin6_addr); else { - ipv6_addr_copy(&sin6->sin6_addr, - &ipv6_hdr(skb)->saddr); + sin6->sin6_addr = ipv6_hdr(skb)->saddr; if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) sin6->sin6_scope_id = IP6CB(skb)->iif; } @@ -432,7 +433,7 @@ try_again: datagram_recv_ctl(sk, msg, skb); } - err = len; + err = copied; if (flags & MSG_TRUNC) err = ulen; @@ -509,7 +510,7 @@ int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) int is_udplite = IS_UDPLITE(sk); if (!ipv6_addr_any(&inet6_sk(sk)->daddr)) - sock_rps_save_rxhash(sk, skb->rxhash); + sock_rps_save_rxhash(sk, skb); if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) goto drop; @@ -533,12 +534,14 @@ int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) } } - if (rcu_dereference_raw(sk->sk_filter)) { + if (rcu_access_pointer(sk->sk_filter)) { if (udp_lib_checksum_complete(skb)) goto drop; } - if ((rc = ip_queue_rcv_skb(sk, skb)) < 0) { + skb_dst_drop(skb); + rc = sock_queue_rcv_skb(sk, skb); + if (rc < 0) { /* Note that an ENOMEM error is charged twice */ if (rc == -ENOMEM) UDP6_INC_STATS_BH(sock_net(sk), @@ -1113,11 +1116,11 @@ do_udp_sendmsg: fl6.flowi6_proto = sk->sk_protocol; if (!ipv6_addr_any(daddr)) - ipv6_addr_copy(&fl6.daddr, daddr); + fl6.daddr = *daddr; else fl6.daddr.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */ if (ipv6_addr_any(&fl6.saddr) && !ipv6_addr_any(&np->saddr)) - ipv6_addr_copy(&fl6.saddr, &np->saddr); + fl6.saddr = np->saddr; fl6.fl6_sport = inet->inet_sport; final_p = fl6_update_dst(&fl6, opt, &final); @@ -1298,7 +1301,8 @@ static int udp6_ufo_send_check(struct sk_buff *skb) return 0; } -static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, u32 features) +static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, + netdev_features_t features) { struct sk_buff *segs = ERR_PTR(-EINVAL); unsigned int mss; @@ -1424,13 +1428,19 @@ int udp6_seq_show(struct seq_file *seq, void *v) return 0; } +static const struct file_operations udp6_afinfo_seq_fops = { + .owner = THIS_MODULE, + .open = udp_seq_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_net +}; + static struct udp_seq_afinfo udp6_seq_afinfo = { .name = "udp6", .family = AF_INET6, .udp_table = &udp_table, - .seq_fops = { - .owner = THIS_MODULE, - }, + .seq_fops = &udp6_afinfo_seq_fops, .seq_ops = { .show = udp6_seq_show, }, diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c index 986c4de5292e..1d08e21d9f69 100644 --- a/net/ipv6/udplite.c +++ b/net/ipv6/udplite.c @@ -11,6 +11,7 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ +#include <linux/export.h> #include "udp_impl.h" static int udplitev6_rcv(struct sk_buff *skb) @@ -93,13 +94,20 @@ void udplitev6_exit(void) } #ifdef CONFIG_PROC_FS + +static const struct file_operations udplite6_afinfo_seq_fops = { + .owner = THIS_MODULE, + .open = udp_seq_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_net +}; + static struct udp_seq_afinfo udplite6_seq_afinfo = { .name = "udplite6", .family = AF_INET6, .udp_table = &udplite_table, - .seq_fops = { - .owner = THIS_MODULE, - }, + .seq_fops = &udplite6_afinfo_seq_fops, .seq_ops = { .show = udp6_seq_show, }, diff --git a/net/ipv6/xfrm6_mode_beet.c b/net/ipv6/xfrm6_mode_beet.c index 3437d7d4eed6..a81ce9450750 100644 --- a/net/ipv6/xfrm6_mode_beet.c +++ b/net/ipv6/xfrm6_mode_beet.c @@ -72,8 +72,8 @@ static int xfrm6_beet_output(struct xfrm_state *x, struct sk_buff *skb) top_iph->nexthdr = IPPROTO_BEETPH; } - ipv6_addr_copy(&top_iph->saddr, (struct in6_addr *)&x->props.saddr); - ipv6_addr_copy(&top_iph->daddr, (struct in6_addr *)&x->id.daddr); + top_iph->saddr = *(struct in6_addr *)&x->props.saddr; + top_iph->daddr = *(struct in6_addr *)&x->id.daddr; return 0; } @@ -99,8 +99,8 @@ static int xfrm6_beet_input(struct xfrm_state *x, struct sk_buff *skb) ip6h = ipv6_hdr(skb); ip6h->payload_len = htons(skb->len - size); - ipv6_addr_copy(&ip6h->daddr, (struct in6_addr *) &x->sel.daddr.a6); - ipv6_addr_copy(&ip6h->saddr, (struct in6_addr *) &x->sel.saddr.a6); + ip6h->daddr = *(struct in6_addr *)&x->sel.daddr.a6; + ip6h->saddr = *(struct in6_addr *)&x->sel.saddr.a6; err = 0; out: return err; diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c index 4d6edff0498f..261e6e6f487e 100644 --- a/net/ipv6/xfrm6_mode_tunnel.c +++ b/net/ipv6/xfrm6_mode_tunnel.c @@ -55,8 +55,8 @@ static int xfrm6_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) dsfield &= ~INET_ECN_MASK; ipv6_change_dsfield(top_iph, 0, dsfield); top_iph->hop_limit = ip6_dst_hoplimit(dst->child); - ipv6_addr_copy(&top_iph->saddr, (const struct in6_addr *)&x->props.saddr); - ipv6_addr_copy(&top_iph->daddr, (const struct in6_addr *)&x->id.daddr); + top_iph->saddr = *(struct in6_addr *)&x->props.saddr; + top_iph->daddr = *(struct in6_addr *)&x->id.daddr; return 0; } diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index 49a91c5f5623..4eeff89c1aaa 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -28,6 +28,43 @@ int xfrm6_find_1stfragopt(struct xfrm_state *x, struct sk_buff *skb, EXPORT_SYMBOL(xfrm6_find_1stfragopt); +static int xfrm6_local_dontfrag(struct sk_buff *skb) +{ + int proto; + struct sock *sk = skb->sk; + + if (sk) { + proto = sk->sk_protocol; + + if (proto == IPPROTO_UDP || proto == IPPROTO_RAW) + return inet6_sk(sk)->dontfrag; + } + + return 0; +} + +static void xfrm6_local_rxpmtu(struct sk_buff *skb, u32 mtu) +{ + struct flowi6 fl6; + struct sock *sk = skb->sk; + + fl6.flowi6_oif = sk->sk_bound_dev_if; + fl6.daddr = ipv6_hdr(skb)->daddr; + + ipv6_local_rxpmtu(sk, &fl6, mtu); +} + +static void xfrm6_local_error(struct sk_buff *skb, u32 mtu) +{ + struct flowi6 fl6; + struct sock *sk = skb->sk; + + fl6.fl6_dport = inet_sk(sk)->inet_dport; + fl6.daddr = ipv6_hdr(skb)->daddr; + + ipv6_local_error(sk, EMSGSIZE, &fl6, mtu); +} + static int xfrm6_tunnel_check_size(struct sk_buff *skb) { int mtu, ret = 0; @@ -39,7 +76,13 @@ static int xfrm6_tunnel_check_size(struct sk_buff *skb) if (!skb->local_df && skb->len > mtu) { skb->dev = dst->dev; - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); + + if (xfrm6_local_dontfrag(skb)) + xfrm6_local_rxpmtu(skb, mtu); + else if (skb->sk) + xfrm6_local_error(skb, mtu); + else + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); ret = -EMSGSIZE; } @@ -93,9 +136,18 @@ static int __xfrm6_output(struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); struct xfrm_state *x = dst->xfrm; + int mtu = ip6_skb_dst_mtu(skb); + + if (skb->len > mtu && xfrm6_local_dontfrag(skb)) { + xfrm6_local_rxpmtu(skb, mtu); + return -EMSGSIZE; + } else if (!skb->local_df && skb->len > mtu && skb->sk) { + xfrm6_local_error(skb, mtu); + return -EMSGSIZE; + } if ((x && x->props.mode == XFRM_MODE_TUNNEL) && - ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) || + ((skb->len > mtu && !skb_is_gso(skb)) || dst_allfrag(skb_dst(skb)))) { return ip6_fragment(skb, x->outer_mode->afinfo->output_finish); } diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index d879f7efbd10..8ea65e032733 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -132,8 +132,8 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) memset(fl6, 0, sizeof(struct flowi6)); fl6->flowi6_mark = skb->mark; - ipv6_addr_copy(&fl6->daddr, reverse ? &hdr->saddr : &hdr->daddr); - ipv6_addr_copy(&fl6->saddr, reverse ? &hdr->daddr : &hdr->saddr); + fl6->daddr = reverse ? hdr->saddr : hdr->daddr; + fl6->saddr = reverse ? hdr->daddr : hdr->saddr; while (nh + offset + 1 < skb->data || pskb_may_pull(skb, nh + offset + 1 - skb->data)) { diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index 248f0b2a7ee9..3f2f7c4ab721 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -15,6 +15,7 @@ #include <linux/pfkeyv2.h> #include <linux/ipsec.h> #include <linux/netfilter_ipv6.h> +#include <linux/export.h> #include <net/dsfield.h> #include <net/ipv6.h> #include <net/addrconf.h> @@ -26,8 +27,8 @@ __xfrm6_init_tempsel(struct xfrm_selector *sel, const struct flowi *fl) /* Initialize temporary selector matching only * to current session. */ - ipv6_addr_copy((struct in6_addr *)&sel->daddr, &fl6->daddr); - ipv6_addr_copy((struct in6_addr *)&sel->saddr, &fl6->saddr); + *(struct in6_addr *)&sel->daddr = fl6->daddr; + *(struct in6_addr *)&sel->saddr = fl6->saddr; sel->dport = xfrm_flowi_dport(fl, &fl6->uli); sel->dport_mask = htons(0xffff); sel->sport = xfrm_flowi_sport(fl, &fl6->uli); diff --git a/net/ipx/ipx_proc.c b/net/ipx/ipx_proc.c index 26b5bfcf1d03..f8ba30dfecae 100644 --- a/net/ipx/ipx_proc.c +++ b/net/ipx/ipx_proc.c @@ -9,6 +9,7 @@ #include <linux/proc_fs.h> #include <linux/spinlock.h> #include <linux/seq_file.h> +#include <linux/export.h> #include <net/net_namespace.h> #include <net/tcp_states.h> #include <net/ipx.h> diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index c24f25ab67d3..bb14c3477680 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -2558,8 +2558,8 @@ bed: self->errno = 0; setup_timer(&self->watchdog, irda_discovery_timeout, (unsigned long)self); - self->watchdog.expires = jiffies + (val * HZ/1000); - add_timer(&(self->watchdog)); + mod_timer(&self->watchdog, + jiffies + msecs_to_jiffies(val)); /* Wait for IR-LMP to call us back */ __wait_event_interruptible(self->query_wait, diff --git a/net/irda/discovery.c b/net/irda/discovery.c index 36c3f037f172..b0b56a339a83 100644 --- a/net/irda/discovery.c +++ b/net/irda/discovery.c @@ -35,6 +35,7 @@ #include <linux/fs.h> #include <linux/seq_file.h> #include <linux/slab.h> +#include <linux/export.h> #include <net/irda/irda.h> #include <net/irda/irlmp.h> diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c index b3cc8b3989a9..253695d43fd9 100644 --- a/net/irda/ircomm/ircomm_tty.c +++ b/net/irda/ircomm/ircomm_tty.c @@ -551,7 +551,7 @@ static void ircomm_tty_close(struct tty_struct *tty, struct file *filp) */ tty->closing = 1; if (self->closing_wait != ASYNC_CLOSING_WAIT_NONE) - tty_wait_until_sent(tty, self->closing_wait); + tty_wait_until_sent_from_close(tty, self->closing_wait); ircomm_tty_shutdown(self); diff --git a/net/irda/irda_device.c b/net/irda/irda_device.c index 3eca35faf2a8..14653b8d664d 100644 --- a/net/irda/irda_device.c +++ b/net/irda/irda_device.c @@ -42,6 +42,7 @@ #include <linux/kmod.h> #include <linux/spinlock.h> #include <linux/slab.h> +#include <linux/export.h> #include <asm/ioctls.h> #include <asm/uaccess.h> diff --git a/net/irda/irlan/irlan_common.c b/net/irda/irlan/irlan_common.c index 779117636270..579617cca125 100644 --- a/net/irda/irlan/irlan_common.c +++ b/net/irda/irlan/irlan_common.c @@ -67,7 +67,7 @@ static void *ckey; static void *skey; /* Module parameters */ -static int eth; /* Use "eth" or "irlan" name for devices */ +static bool eth; /* Use "eth" or "irlan" name for devices */ static int access = ACCESS_PEER; /* PEER, DIRECT or HOSTED */ #ifdef CONFIG_PROC_FS diff --git a/net/irda/irlan/irlan_eth.c b/net/irda/irlan/irlan_eth.c index e8d5f4405d68..d14152e866d9 100644 --- a/net/irda/irlan/irlan_eth.c +++ b/net/irda/irlan/irlan_eth.c @@ -50,7 +50,7 @@ static const struct net_device_ops irlan_eth_netdev_ops = { .ndo_open = irlan_eth_open, .ndo_stop = irlan_eth_close, .ndo_start_xmit = irlan_eth_xmit, - .ndo_set_multicast_list = irlan_eth_set_multicast_list, + .ndo_set_rx_mode = irlan_eth_set_multicast_list, .ndo_change_mtu = eth_change_mtu, .ndo_validate_addr = eth_validate_addr, }; diff --git a/net/irda/irttp.c b/net/irda/irttp.c index 285ccd623ae5..5c93f2952b08 100644 --- a/net/irda/irttp.c +++ b/net/irda/irttp.c @@ -29,6 +29,7 @@ #include <linux/fs.h> #include <linux/seq_file.h> #include <linux/slab.h> +#include <linux/export.h> #include <asm/byteorder.h> #include <asm/unaligned.h> @@ -1460,14 +1461,12 @@ struct tsap_cb *irttp_dup(struct tsap_cb *orig, void *instance) } /* Allocate a new instance */ - new = kmalloc(sizeof(struct tsap_cb), GFP_ATOMIC); + new = kmemdup(orig, sizeof(struct tsap_cb), GFP_ATOMIC); if (!new) { IRDA_DEBUG(0, "%s(), unable to kmalloc\n", __func__); spin_unlock_irqrestore(&irttp->tsaps->hb_spinlock, flags); return NULL; } - /* Dup */ - memcpy(new, orig, sizeof(struct tsap_cb)); spin_lock_init(&new->lock); /* We don't need the old instance any more */ diff --git a/net/irda/qos.c b/net/irda/qos.c index 4369f7f41bcb..798ffd9a705e 100644 --- a/net/irda/qos.c +++ b/net/irda/qos.c @@ -30,6 +30,8 @@ * ********************************************************************/ +#include <linux/export.h> + #include <asm/byteorder.h> #include <net/irda/irda.h> diff --git a/net/iucv/Kconfig b/net/iucv/Kconfig index 16ce9cd4f39e..497fbe732def 100644 --- a/net/iucv/Kconfig +++ b/net/iucv/Kconfig @@ -1,15 +1,17 @@ config IUCV - tristate "IUCV support (S390 - z/VM only)" depends on S390 + def_tristate y if S390 + prompt "IUCV support (S390 - z/VM only)" help Select this option if you want to use inter-user communication under VM or VIF. If you run on z/VM, say "Y" to enable a fast communication link between VM guests. config AFIUCV - tristate "AF_IUCV support (S390 - z/VM only)" - depends on IUCV + depends on S390 + def_tristate m if QETH_L3 || IUCV + prompt "AF_IUCV Socket support (S390 - z/VM and HiperSockets transport)" help - Select this option if you want to use inter-user communication under - VM or VIF sockets. If you run on z/VM, say "Y" to enable a fast - communication link between VM guests. + Select this option if you want to use AF_IUCV socket applications + based on z/VM inter-user communication vehicle or based on + HiperSockets. diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index e2013e434d03..d5c5b8fd1d01 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -27,10 +27,9 @@ #include <asm/cpcmd.h> #include <linux/kmod.h> -#include <net/iucv/iucv.h> #include <net/iucv/af_iucv.h> -#define VERSION "1.1" +#define VERSION "1.2" static char iucv_userid[80]; @@ -42,6 +41,8 @@ static struct proto iucv_proto = { .obj_size = sizeof(struct iucv_sock), }; +static struct iucv_interface *pr_iucv; + /* special AF_IUCV IPRM messages */ static const u8 iprm_shutdown[8] = {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01}; @@ -90,6 +91,12 @@ do { \ static void iucv_sock_kill(struct sock *sk); static void iucv_sock_close(struct sock *sk); +static int afiucv_hs_rcv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, struct net_device *orig_dev); +static int afiucv_hs_send(struct iucv_message *imsg, struct sock *sock, + struct sk_buff *skb, u8 flags); +static void afiucv_hs_callback_txnotify(struct sk_buff *, enum iucv_tx_notify); + /* Call Back functions */ static void iucv_callback_rx(struct iucv_path *, struct iucv_message *); static void iucv_callback_txdone(struct iucv_path *, struct iucv_message *); @@ -123,6 +130,17 @@ static inline void low_nmcpy(unsigned char *dst, char *src) memcpy(&dst[8], src, 8); } +static void iucv_skb_queue_purge(struct sk_buff_head *list) +{ + struct sk_buff *skb; + + while ((skb = skb_dequeue(list)) != NULL) { + if (skb->dev) + dev_put(skb->dev); + kfree_skb(skb); + } +} + static int afiucv_pm_prepare(struct device *dev) { #ifdef CONFIG_PM_DEBUG @@ -157,15 +175,14 @@ static int afiucv_pm_freeze(struct device *dev) read_lock(&iucv_sk_list.lock); sk_for_each(sk, node, &iucv_sk_list.head) { iucv = iucv_sk(sk); - skb_queue_purge(&iucv->send_skb_q); + iucv_skb_queue_purge(&iucv->send_skb_q); skb_queue_purge(&iucv->backlog_skb_q); switch (sk->sk_state) { - case IUCV_SEVERED: case IUCV_DISCONN: case IUCV_CLOSING: case IUCV_CONNECTED: if (iucv->path) { - err = iucv_path_sever(iucv->path, NULL); + err = pr_iucv->path_sever(iucv->path, NULL); iucv_path_free(iucv->path); iucv->path = NULL; } @@ -205,7 +222,6 @@ static int afiucv_pm_restore_thaw(struct device *dev) sk->sk_state_change(sk); break; case IUCV_DISCONN: - case IUCV_SEVERED: case IUCV_CLOSING: case IUCV_LISTEN: case IUCV_BOUND: @@ -229,7 +245,7 @@ static const struct dev_pm_ops afiucv_pm_ops = { static struct device_driver af_iucv_driver = { .owner = THIS_MODULE, .name = "afiucv", - .bus = &iucv_bus, + .bus = NULL, .pm = &afiucv_pm_ops, }; @@ -294,7 +310,11 @@ static inline int iucv_below_msglim(struct sock *sk) if (sk->sk_state != IUCV_CONNECTED) return 1; - return (skb_queue_len(&iucv->send_skb_q) < iucv->path->msglim); + if (iucv->transport == AF_IUCV_TRANS_IUCV) + return (skb_queue_len(&iucv->send_skb_q) < iucv->path->msglim); + else + return ((atomic_read(&iucv->msg_sent) < iucv->msglimit_peer) && + (atomic_read(&iucv->pendings) <= 0)); } /** @@ -312,23 +332,76 @@ static void iucv_sock_wake_msglim(struct sock *sk) rcu_read_unlock(); } -/* Timers */ -static void iucv_sock_timeout(unsigned long arg) -{ - struct sock *sk = (struct sock *)arg; - - bh_lock_sock(sk); - sk->sk_err = ETIMEDOUT; - sk->sk_state_change(sk); - bh_unlock_sock(sk); - - iucv_sock_kill(sk); - sock_put(sk); -} - -static void iucv_sock_clear_timer(struct sock *sk) +/** + * afiucv_hs_send() - send a message through HiperSockets transport + */ +static int afiucv_hs_send(struct iucv_message *imsg, struct sock *sock, + struct sk_buff *skb, u8 flags) { - sk_stop_timer(sk, &sk->sk_timer); + struct net *net = sock_net(sock); + struct iucv_sock *iucv = iucv_sk(sock); + struct af_iucv_trans_hdr *phs_hdr; + struct sk_buff *nskb; + int err, confirm_recv = 0; + + memset(skb->head, 0, ETH_HLEN); + phs_hdr = (struct af_iucv_trans_hdr *)skb_push(skb, + sizeof(struct af_iucv_trans_hdr)); + skb_reset_mac_header(skb); + skb_reset_network_header(skb); + skb_push(skb, ETH_HLEN); + skb_reset_mac_header(skb); + memset(phs_hdr, 0, sizeof(struct af_iucv_trans_hdr)); + + phs_hdr->magic = ETH_P_AF_IUCV; + phs_hdr->version = 1; + phs_hdr->flags = flags; + if (flags == AF_IUCV_FLAG_SYN) + phs_hdr->window = iucv->msglimit; + else if ((flags == AF_IUCV_FLAG_WIN) || !flags) { + confirm_recv = atomic_read(&iucv->msg_recv); + phs_hdr->window = confirm_recv; + if (confirm_recv) + phs_hdr->flags = phs_hdr->flags | AF_IUCV_FLAG_WIN; + } + memcpy(phs_hdr->destUserID, iucv->dst_user_id, 8); + memcpy(phs_hdr->destAppName, iucv->dst_name, 8); + memcpy(phs_hdr->srcUserID, iucv->src_user_id, 8); + memcpy(phs_hdr->srcAppName, iucv->src_name, 8); + ASCEBC(phs_hdr->destUserID, sizeof(phs_hdr->destUserID)); + ASCEBC(phs_hdr->destAppName, sizeof(phs_hdr->destAppName)); + ASCEBC(phs_hdr->srcUserID, sizeof(phs_hdr->srcUserID)); + ASCEBC(phs_hdr->srcAppName, sizeof(phs_hdr->srcAppName)); + if (imsg) + memcpy(&phs_hdr->iucv_hdr, imsg, sizeof(struct iucv_message)); + + skb->dev = dev_get_by_index(net, sock->sk_bound_dev_if); + if (!skb->dev) + return -ENODEV; + if (!(skb->dev->flags & IFF_UP)) + return -ENETDOWN; + if (skb->len > skb->dev->mtu) { + if (sock->sk_type == SOCK_SEQPACKET) + return -EMSGSIZE; + else + skb_trim(skb, skb->dev->mtu); + } + skb->protocol = ETH_P_AF_IUCV; + skb_shinfo(skb)->tx_flags |= SKBTX_DRV_NEEDS_SK_REF; + nskb = skb_clone(skb, GFP_ATOMIC); + if (!nskb) + return -ENOMEM; + skb_queue_tail(&iucv->send_skb_q, nskb); + err = dev_queue_xmit(skb); + if (err) { + skb_unlink(nskb, &iucv->send_skb_q); + dev_put(nskb->dev); + kfree_skb(nskb); + } else { + atomic_sub(confirm_recv, &iucv->msg_recv); + WARN_ON(atomic_read(&iucv->msg_recv) < 0); + } + return err; } static struct sock *__iucv_get_sock_by_name(char *nm) @@ -380,8 +453,9 @@ static void iucv_sock_close(struct sock *sk) unsigned char user_data[16]; struct iucv_sock *iucv = iucv_sk(sk); unsigned long timeo; + int err, blen; + struct sk_buff *skb; - iucv_sock_clear_timer(sk); lock_sock(sk); switch (sk->sk_state) { @@ -390,7 +464,19 @@ static void iucv_sock_close(struct sock *sk) break; case IUCV_CONNECTED: - case IUCV_DISCONN: + if (iucv->transport == AF_IUCV_TRANS_HIPER) { + /* send fin */ + blen = sizeof(struct af_iucv_trans_hdr) + ETH_HLEN; + skb = sock_alloc_send_skb(sk, blen, 1, &err); + if (skb) { + skb_reserve(skb, blen); + err = afiucv_hs_send(NULL, sk, skb, + AF_IUCV_FLAG_FIN); + } + sk->sk_state = IUCV_DISCONN; + sk->sk_state_change(sk); + } + case IUCV_DISCONN: /* fall through */ sk->sk_state = IUCV_CLOSING; sk->sk_state_change(sk); @@ -412,7 +498,7 @@ static void iucv_sock_close(struct sock *sk) low_nmcpy(user_data, iucv->src_name); high_nmcpy(user_data, iucv->dst_name); ASCEBC(user_data, sizeof(user_data)); - iucv_path_sever(iucv->path, user_data); + pr_iucv->path_sever(iucv->path, user_data); iucv_path_free(iucv->path); iucv->path = NULL; } @@ -420,7 +506,7 @@ static void iucv_sock_close(struct sock *sk) sk->sk_err = ECONNRESET; sk->sk_state_change(sk); - skb_queue_purge(&iucv->send_skb_q); + iucv_skb_queue_purge(&iucv->send_skb_q); skb_queue_purge(&iucv->backlog_skb_q); break; @@ -444,23 +530,33 @@ static void iucv_sock_init(struct sock *sk, struct sock *parent) static struct sock *iucv_sock_alloc(struct socket *sock, int proto, gfp_t prio) { struct sock *sk; + struct iucv_sock *iucv; sk = sk_alloc(&init_net, PF_IUCV, prio, &iucv_proto); if (!sk) return NULL; + iucv = iucv_sk(sk); sock_init_data(sock, sk); - INIT_LIST_HEAD(&iucv_sk(sk)->accept_q); - spin_lock_init(&iucv_sk(sk)->accept_q_lock); - skb_queue_head_init(&iucv_sk(sk)->send_skb_q); - INIT_LIST_HEAD(&iucv_sk(sk)->message_q.list); - spin_lock_init(&iucv_sk(sk)->message_q.lock); - skb_queue_head_init(&iucv_sk(sk)->backlog_skb_q); - iucv_sk(sk)->send_tag = 0; - iucv_sk(sk)->flags = 0; - iucv_sk(sk)->msglimit = IUCV_QUEUELEN_DEFAULT; - iucv_sk(sk)->path = NULL; - memset(&iucv_sk(sk)->src_user_id , 0, 32); + INIT_LIST_HEAD(&iucv->accept_q); + spin_lock_init(&iucv->accept_q_lock); + skb_queue_head_init(&iucv->send_skb_q); + INIT_LIST_HEAD(&iucv->message_q.list); + spin_lock_init(&iucv->message_q.lock); + skb_queue_head_init(&iucv->backlog_skb_q); + iucv->send_tag = 0; + atomic_set(&iucv->pendings, 0); + iucv->flags = 0; + iucv->msglimit = 0; + atomic_set(&iucv->msg_sent, 0); + atomic_set(&iucv->msg_recv, 0); + iucv->path = NULL; + iucv->sk_txnotify = afiucv_hs_callback_txnotify; + memset(&iucv->src_user_id , 0, 32); + if (pr_iucv) + iucv->transport = AF_IUCV_TRANS_IUCV; + else + iucv->transport = AF_IUCV_TRANS_HIPER; sk->sk_destruct = iucv_sock_destruct; sk->sk_sndtimeo = IUCV_CONN_TIMEOUT; @@ -471,8 +567,6 @@ static struct sock *iucv_sock_alloc(struct socket *sock, int proto, gfp_t prio) sk->sk_protocol = proto; sk->sk_state = IUCV_OPEN; - setup_timer(&sk->sk_timer, iucv_sock_timeout, (unsigned long)sk); - iucv_sock_link(&iucv_sk_list, sk); return sk; } @@ -565,16 +659,12 @@ struct sock *iucv_accept_dequeue(struct sock *parent, struct socket *newsock) } if (sk->sk_state == IUCV_CONNECTED || - sk->sk_state == IUCV_SEVERED || - sk->sk_state == IUCV_DISCONN || /* due to PM restore */ + sk->sk_state == IUCV_DISCONN || !newsock) { iucv_accept_unlink(sk); if (newsock) sock_graft(sk, newsock); - if (sk->sk_state == IUCV_SEVERED) - sk->sk_state = IUCV_DISCONN; - release_sock(sk); return sk; } @@ -591,7 +681,9 @@ static int iucv_sock_bind(struct socket *sock, struct sockaddr *addr, struct sockaddr_iucv *sa = (struct sockaddr_iucv *) addr; struct sock *sk = sock->sk; struct iucv_sock *iucv; - int err; + int err = 0; + struct net_device *dev; + char uid[9]; /* Verify the input sockaddr */ if (!addr || addr->sa_family != AF_IUCV) @@ -610,19 +702,46 @@ static int iucv_sock_bind(struct socket *sock, struct sockaddr *addr, err = -EADDRINUSE; goto done_unlock; } - if (iucv->path) { - err = 0; + if (iucv->path) goto done_unlock; - } /* Bind the socket */ - memcpy(iucv->src_name, sa->siucv_name, 8); - /* Copy the user id */ - memcpy(iucv->src_user_id, iucv_userid, 8); - sk->sk_state = IUCV_BOUND; - err = 0; + if (pr_iucv) + if (!memcmp(sa->siucv_user_id, iucv_userid, 8)) + goto vm_bind; /* VM IUCV transport */ + /* try hiper transport */ + memcpy(uid, sa->siucv_user_id, sizeof(uid)); + ASCEBC(uid, 8); + rcu_read_lock(); + for_each_netdev_rcu(&init_net, dev) { + if (!memcmp(dev->perm_addr, uid, 8)) { + memcpy(iucv->src_name, sa->siucv_name, 8); + memcpy(iucv->src_user_id, sa->siucv_user_id, 8); + sk->sk_bound_dev_if = dev->ifindex; + sk->sk_state = IUCV_BOUND; + iucv->transport = AF_IUCV_TRANS_HIPER; + if (!iucv->msglimit) + iucv->msglimit = IUCV_HIPER_MSGLIM_DEFAULT; + rcu_read_unlock(); + goto done_unlock; + } + } + rcu_read_unlock(); +vm_bind: + if (pr_iucv) { + /* use local userid for backward compat */ + memcpy(iucv->src_name, sa->siucv_name, 8); + memcpy(iucv->src_user_id, iucv_userid, 8); + sk->sk_state = IUCV_BOUND; + iucv->transport = AF_IUCV_TRANS_IUCV; + if (!iucv->msglimit) + iucv->msglimit = IUCV_QUEUELEN_DEFAULT; + goto done_unlock; + } + /* found no dev to bind */ + err = -ENODEV; done_unlock: /* Release the socket list lock */ write_unlock_bh(&iucv_sk_list.lock); @@ -635,16 +754,13 @@ done: static int iucv_sock_autobind(struct sock *sk) { struct iucv_sock *iucv = iucv_sk(sk); - char query_buffer[80]; char name[12]; int err = 0; - /* Set the userid and name */ - cpcmd("QUERY USERID", query_buffer, sizeof(query_buffer), &err); - if (unlikely(err)) + if (unlikely(!pr_iucv)) return -EPROTO; - memcpy(iucv->src_user_id, query_buffer, 8); + memcpy(iucv->src_user_id, iucv_userid, 8); write_lock_bh(&iucv_sk_list.lock); @@ -658,45 +774,44 @@ static int iucv_sock_autobind(struct sock *sk) memcpy(&iucv->src_name, name, 8); + if (!iucv->msglimit) + iucv->msglimit = IUCV_QUEUELEN_DEFAULT; + return err; } -/* Connect an unconnected socket */ -static int iucv_sock_connect(struct socket *sock, struct sockaddr *addr, - int alen, int flags) +static int afiucv_hs_connect(struct socket *sock) { - struct sockaddr_iucv *sa = (struct sockaddr_iucv *) addr; struct sock *sk = sock->sk; - struct iucv_sock *iucv; - unsigned char user_data[16]; - int err; - - if (addr->sa_family != AF_IUCV || alen < sizeof(struct sockaddr_iucv)) - return -EINVAL; - - if (sk->sk_state != IUCV_OPEN && sk->sk_state != IUCV_BOUND) - return -EBADFD; - - if (sk->sk_type != SOCK_STREAM && sk->sk_type != SOCK_SEQPACKET) - return -EINVAL; + struct sk_buff *skb; + int blen = sizeof(struct af_iucv_trans_hdr) + ETH_HLEN; + int err = 0; - if (sk->sk_state == IUCV_OPEN) { - err = iucv_sock_autobind(sk); - if (unlikely(err)) - return err; + /* send syn */ + skb = sock_alloc_send_skb(sk, blen, 1, &err); + if (!skb) { + err = -ENOMEM; + goto done; } + skb->dev = NULL; + skb_reserve(skb, blen); + err = afiucv_hs_send(NULL, sk, skb, AF_IUCV_FLAG_SYN); +done: + return err; +} - lock_sock(sk); - - /* Set the destination information */ - memcpy(iucv_sk(sk)->dst_user_id, sa->siucv_user_id, 8); - memcpy(iucv_sk(sk)->dst_name, sa->siucv_name, 8); +static int afiucv_path_connect(struct socket *sock, struct sockaddr *addr) +{ + struct sockaddr_iucv *sa = (struct sockaddr_iucv *) addr; + struct sock *sk = sock->sk; + struct iucv_sock *iucv = iucv_sk(sk); + unsigned char user_data[16]; + int err; high_nmcpy(user_data, sa->siucv_name); - low_nmcpy(user_data, iucv_sk(sk)->src_name); + low_nmcpy(user_data, iucv->src_name); ASCEBC(user_data, sizeof(user_data)); - iucv = iucv_sk(sk); /* Create path. */ iucv->path = iucv_path_alloc(iucv->msglimit, IUCV_IPRMDATA, GFP_KERNEL); @@ -704,8 +819,9 @@ static int iucv_sock_connect(struct socket *sock, struct sockaddr *addr, err = -ENOMEM; goto done; } - err = iucv_path_connect(iucv->path, &af_iucv_handler, - sa->siucv_user_id, NULL, user_data, sk); + err = pr_iucv->path_connect(iucv->path, &af_iucv_handler, + sa->siucv_user_id, NULL, user_data, + sk); if (err) { iucv_path_free(iucv->path); iucv->path = NULL; @@ -724,21 +840,62 @@ static int iucv_sock_connect(struct socket *sock, struct sockaddr *addr, err = -ECONNREFUSED; break; } - goto done; } +done: + return err; +} - if (sk->sk_state != IUCV_CONNECTED) { +/* Connect an unconnected socket */ +static int iucv_sock_connect(struct socket *sock, struct sockaddr *addr, + int alen, int flags) +{ + struct sockaddr_iucv *sa = (struct sockaddr_iucv *) addr; + struct sock *sk = sock->sk; + struct iucv_sock *iucv = iucv_sk(sk); + int err; + + if (addr->sa_family != AF_IUCV || alen < sizeof(struct sockaddr_iucv)) + return -EINVAL; + + if (sk->sk_state != IUCV_OPEN && sk->sk_state != IUCV_BOUND) + return -EBADFD; + + if (sk->sk_state == IUCV_OPEN && + iucv->transport == AF_IUCV_TRANS_HIPER) + return -EBADFD; /* explicit bind required */ + + if (sk->sk_type != SOCK_STREAM && sk->sk_type != SOCK_SEQPACKET) + return -EINVAL; + + if (sk->sk_state == IUCV_OPEN) { + err = iucv_sock_autobind(sk); + if (unlikely(err)) + return err; + } + + lock_sock(sk); + + /* Set the destination information */ + memcpy(iucv->dst_user_id, sa->siucv_user_id, 8); + memcpy(iucv->dst_name, sa->siucv_name, 8); + + if (iucv->transport == AF_IUCV_TRANS_HIPER) + err = afiucv_hs_connect(sock); + else + err = afiucv_path_connect(sock, addr); + if (err) + goto done; + + if (sk->sk_state != IUCV_CONNECTED) err = iucv_sock_wait(sk, iucv_sock_in_state(sk, IUCV_CONNECTED, IUCV_DISCONN), sock_sndtimeo(sk, flags & O_NONBLOCK)); - } - if (sk->sk_state == IUCV_DISCONN) { + if (sk->sk_state == IUCV_DISCONN || sk->sk_state == IUCV_CLOSED) err = -ECONNREFUSED; - } - if (err) { - iucv_path_sever(iucv->path, NULL); + if (err && iucv->transport == AF_IUCV_TRANS_IUCV) { + pr_iucv->path_sever(iucv->path, NULL); iucv_path_free(iucv->path); iucv->path = NULL; } @@ -833,20 +990,21 @@ static int iucv_sock_getname(struct socket *sock, struct sockaddr *addr, { struct sockaddr_iucv *siucv = (struct sockaddr_iucv *) addr; struct sock *sk = sock->sk; + struct iucv_sock *iucv = iucv_sk(sk); addr->sa_family = AF_IUCV; *len = sizeof(struct sockaddr_iucv); if (peer) { - memcpy(siucv->siucv_user_id, iucv_sk(sk)->dst_user_id, 8); - memcpy(siucv->siucv_name, &iucv_sk(sk)->dst_name, 8); + memcpy(siucv->siucv_user_id, iucv->dst_user_id, 8); + memcpy(siucv->siucv_name, iucv->dst_name, 8); } else { - memcpy(siucv->siucv_user_id, iucv_sk(sk)->src_user_id, 8); - memcpy(siucv->siucv_name, iucv_sk(sk)->src_name, 8); + memcpy(siucv->siucv_user_id, iucv->src_user_id, 8); + memcpy(siucv->siucv_name, iucv->src_name, 8); } memset(&siucv->siucv_port, 0, sizeof(siucv->siucv_port)); memset(&siucv->siucv_addr, 0, sizeof(siucv->siucv_addr)); - memset(siucv->siucv_nodeid, 0, sizeof(siucv->siucv_nodeid)); + memset(&siucv->siucv_nodeid, 0, sizeof(siucv->siucv_nodeid)); return 0; } @@ -871,7 +1029,7 @@ static int iucv_send_iprm(struct iucv_path *path, struct iucv_message *msg, memcpy(prmdata, (void *) skb->data, skb->len); prmdata[7] = 0xff - (u8) skb->len; - return iucv_message_send(path, msg, IUCV_IPRMDATA, 0, + return pr_iucv->message_send(path, msg, IUCV_IPRMDATA, 0, (void *) prmdata, 8); } @@ -960,9 +1118,16 @@ static int iucv_sock_sendmsg(struct kiocb *iocb, struct socket *sock, * this is fine for SOCK_SEQPACKET (unless we want to support * segmented records using the MSG_EOR flag), but * for SOCK_STREAM we might want to improve it in future */ - skb = sock_alloc_send_skb(sk, len, noblock, &err); + if (iucv->transport == AF_IUCV_TRANS_HIPER) + skb = sock_alloc_send_skb(sk, + len + sizeof(struct af_iucv_trans_hdr) + ETH_HLEN, + noblock, &err); + else + skb = sock_alloc_send_skb(sk, len, noblock, &err); if (!skb) goto out; + if (iucv->transport == AF_IUCV_TRANS_HIPER) + skb_reserve(skb, sizeof(struct af_iucv_trans_hdr) + ETH_HLEN); if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)) { err = -EFAULT; goto fail; @@ -983,6 +1148,15 @@ static int iucv_sock_sendmsg(struct kiocb *iocb, struct socket *sock, /* increment and save iucv message tag for msg_completion cbk */ txmsg.tag = iucv->send_tag++; memcpy(CB_TAG(skb), &txmsg.tag, CB_TAG_LEN); + if (iucv->transport == AF_IUCV_TRANS_HIPER) { + atomic_inc(&iucv->msg_sent); + err = afiucv_hs_send(&txmsg, sk, skb, 0); + if (err) { + atomic_dec(&iucv->msg_sent); + goto fail; + } + goto release; + } skb_queue_tail(&iucv->send_skb_q, skb); if (((iucv->path->flags & IUCV_IPRMDATA) & iucv->flags) @@ -999,13 +1173,13 @@ static int iucv_sock_sendmsg(struct kiocb *iocb, struct socket *sock, /* this error should never happen since the * IUCV_IPRMDATA path flag is set... sever path */ if (err == 0x15) { - iucv_path_sever(iucv->path, NULL); + pr_iucv->path_sever(iucv->path, NULL); skb_unlink(skb, &iucv->send_skb_q); err = -EPIPE; goto fail; } } else - err = iucv_message_send(iucv->path, &txmsg, 0, 0, + err = pr_iucv->message_send(iucv->path, &txmsg, 0, 0, (void *) skb->data, skb->len); if (err) { if (err == 3) { @@ -1023,10 +1197,13 @@ static int iucv_sock_sendmsg(struct kiocb *iocb, struct socket *sock, goto fail; } +release: release_sock(sk); return len; fail: + if (skb->dev) + dev_put(skb->dev); kfree_skb(skb); out: release_sock(sk); @@ -1095,8 +1272,9 @@ static void iucv_process_message(struct sock *sk, struct sk_buff *skb, skb->len = 0; } } else { - rc = iucv_message_receive(path, msg, msg->flags & IUCV_IPRMDATA, - skb->data, len, NULL); + rc = pr_iucv->message_receive(path, msg, + msg->flags & IUCV_IPRMDATA, + skb->data, len, NULL); if (rc) { kfree_skb(skb); return; @@ -1110,7 +1288,7 @@ static void iucv_process_message(struct sock *sk, struct sk_buff *skb, kfree_skb(skb); skb = NULL; if (rc) { - iucv_path_sever(path, NULL); + pr_iucv->path_sever(path, NULL); return; } skb = skb_dequeue(&iucv_sk(sk)->backlog_skb_q); @@ -1154,10 +1332,11 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock, struct sock *sk = sock->sk; struct iucv_sock *iucv = iucv_sk(sk); unsigned int copied, rlen; - struct sk_buff *skb, *rskb, *cskb; + struct sk_buff *skb, *rskb, *cskb, *sskb; + int blen; int err = 0; - if ((sk->sk_state == IUCV_DISCONN || sk->sk_state == IUCV_SEVERED) && + if ((sk->sk_state == IUCV_DISCONN) && skb_queue_empty(&iucv->backlog_skb_q) && skb_queue_empty(&sk->sk_receive_queue) && list_empty(&iucv->message_q.list)) @@ -1179,7 +1358,7 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock, copied = min_t(unsigned int, rlen, len); cskb = skb; - if (memcpy_toiovec(msg->msg_iov, cskb->data, copied)) { + if (skb_copy_datagram_iovec(cskb, 0, msg->msg_iov, copied)) { if (!(flags & MSG_PEEK)) skb_queue_head(&sk->sk_receive_queue, skb); return -EFAULT; @@ -1217,6 +1396,7 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock, } kfree_skb(skb); + atomic_inc(&iucv->msg_recv); /* Queue backlog skbs */ spin_lock_bh(&iucv->message_q.lock); @@ -1233,6 +1413,22 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock, if (skb_queue_empty(&iucv->backlog_skb_q)) { if (!list_empty(&iucv->message_q.list)) iucv_process_message_q(sk); + if (atomic_read(&iucv->msg_recv) >= + iucv->msglimit / 2) { + /* send WIN to peer */ + blen = sizeof(struct af_iucv_trans_hdr) + + ETH_HLEN; + sskb = sock_alloc_send_skb(sk, blen, 1, &err); + if (sskb) { + skb_reserve(sskb, blen); + err = afiucv_hs_send(NULL, sk, sskb, + AF_IUCV_FLAG_WIN); + } + if (err) { + sk->sk_state = IUCV_DISCONN; + sk->sk_state_change(sk); + } + } } spin_unlock_bh(&iucv->message_q.lock); } @@ -1287,7 +1483,7 @@ unsigned int iucv_sock_poll(struct file *file, struct socket *sock, if (sk->sk_state == IUCV_CLOSED) mask |= POLLHUP; - if (sk->sk_state == IUCV_DISCONN || sk->sk_state == IUCV_SEVERED) + if (sk->sk_state == IUCV_DISCONN) mask |= POLLIN; if (sock_writeable(sk)) @@ -1314,7 +1510,6 @@ static int iucv_sock_shutdown(struct socket *sock, int how) switch (sk->sk_state) { case IUCV_DISCONN: case IUCV_CLOSING: - case IUCV_SEVERED: case IUCV_CLOSED: err = -ENOTCONN; goto fail; @@ -1327,8 +1522,8 @@ static int iucv_sock_shutdown(struct socket *sock, int how) if (how == SEND_SHUTDOWN || how == SHUTDOWN_MASK) { txmsg.class = 0; txmsg.tag = 0; - err = iucv_message_send(iucv->path, &txmsg, IUCV_IPRMDATA, 0, - (void *) iprm_shutdown, 8); + err = pr_iucv->message_send(iucv->path, &txmsg, IUCV_IPRMDATA, + 0, (void *) iprm_shutdown, 8); if (err) { switch (err) { case 1: @@ -1345,7 +1540,7 @@ static int iucv_sock_shutdown(struct socket *sock, int how) } if (how == RCV_SHUTDOWN || how == SHUTDOWN_MASK) { - err = iucv_path_quiesce(iucv_sk(sk)->path, NULL); + err = pr_iucv->path_quiesce(iucv->path, NULL); if (err) err = -ENOTCONN; @@ -1372,7 +1567,7 @@ static int iucv_sock_release(struct socket *sock) /* Unregister with IUCV base support */ if (iucv_sk(sk)->path) { - iucv_path_sever(iucv_sk(sk)->path, NULL); + pr_iucv->path_sever(iucv_sk(sk)->path, NULL); iucv_path_free(iucv_sk(sk)->path); iucv_sk(sk)->path = NULL; } @@ -1514,14 +1709,14 @@ static int iucv_callback_connreq(struct iucv_path *path, high_nmcpy(user_data, iucv->dst_name); ASCEBC(user_data, sizeof(user_data)); if (sk->sk_state != IUCV_LISTEN) { - err = iucv_path_sever(path, user_data); + err = pr_iucv->path_sever(path, user_data); iucv_path_free(path); goto fail; } /* Check for backlog size */ if (sk_acceptq_is_full(sk)) { - err = iucv_path_sever(path, user_data); + err = pr_iucv->path_sever(path, user_data); iucv_path_free(path); goto fail; } @@ -1529,7 +1724,7 @@ static int iucv_callback_connreq(struct iucv_path *path, /* Create the new socket */ nsk = iucv_sock_alloc(NULL, sk->sk_type, GFP_ATOMIC); if (!nsk) { - err = iucv_path_sever(path, user_data); + err = pr_iucv->path_sever(path, user_data); iucv_path_free(path); goto fail; } @@ -1553,9 +1748,9 @@ static int iucv_callback_connreq(struct iucv_path *path, /* set message limit for path based on msglimit of accepting socket */ niucv->msglimit = iucv->msglimit; path->msglim = iucv->msglimit; - err = iucv_path_accept(path, &af_iucv_handler, nuser_data, nsk); + err = pr_iucv->path_accept(path, &af_iucv_handler, nuser_data, nsk); if (err) { - err = iucv_path_sever(path, user_data); + err = pr_iucv->path_sever(path, user_data); iucv_path_free(path); iucv_sock_kill(nsk); goto fail; @@ -1589,7 +1784,7 @@ static void iucv_callback_rx(struct iucv_path *path, struct iucv_message *msg) int len; if (sk->sk_shutdown & RCV_SHUTDOWN) { - iucv_message_reject(path, msg); + pr_iucv->message_reject(path, msg); return; } @@ -1600,7 +1795,7 @@ static void iucv_callback_rx(struct iucv_path *path, struct iucv_message *msg) goto save_message; len = atomic_read(&sk->sk_rmem_alloc); - len += iucv_msg_length(msg) + sizeof(struct sk_buff); + len += SKB_TRUESIZE(iucv_msg_length(msg)); if (len > sk->sk_rcvbuf) goto save_message; @@ -1669,10 +1864,7 @@ static void iucv_callback_connrej(struct iucv_path *path, u8 ipuser[16]) { struct sock *sk = path->private; - if (!list_empty(&iucv_sk(sk)->accept_q)) - sk->sk_state = IUCV_SEVERED; - else - sk->sk_state = IUCV_DISCONN; + sk->sk_state = IUCV_DISCONN; sk->sk_state_change(sk); } @@ -1692,6 +1884,395 @@ static void iucv_callback_shutdown(struct iucv_path *path, u8 ipuser[16]) bh_unlock_sock(sk); } +/***************** HiperSockets transport callbacks ********************/ +static void afiucv_swap_src_dest(struct sk_buff *skb) +{ + struct af_iucv_trans_hdr *trans_hdr = + (struct af_iucv_trans_hdr *)skb->data; + char tmpID[8]; + char tmpName[8]; + + ASCEBC(trans_hdr->destUserID, sizeof(trans_hdr->destUserID)); + ASCEBC(trans_hdr->destAppName, sizeof(trans_hdr->destAppName)); + ASCEBC(trans_hdr->srcUserID, sizeof(trans_hdr->srcUserID)); + ASCEBC(trans_hdr->srcAppName, sizeof(trans_hdr->srcAppName)); + memcpy(tmpID, trans_hdr->srcUserID, 8); + memcpy(tmpName, trans_hdr->srcAppName, 8); + memcpy(trans_hdr->srcUserID, trans_hdr->destUserID, 8); + memcpy(trans_hdr->srcAppName, trans_hdr->destAppName, 8); + memcpy(trans_hdr->destUserID, tmpID, 8); + memcpy(trans_hdr->destAppName, tmpName, 8); + skb_push(skb, ETH_HLEN); + memset(skb->data, 0, ETH_HLEN); +} + +/** + * afiucv_hs_callback_syn - react on received SYN + **/ +static int afiucv_hs_callback_syn(struct sock *sk, struct sk_buff *skb) +{ + struct sock *nsk; + struct iucv_sock *iucv, *niucv; + struct af_iucv_trans_hdr *trans_hdr; + int err; + + iucv = iucv_sk(sk); + trans_hdr = (struct af_iucv_trans_hdr *)skb->data; + if (!iucv) { + /* no sock - connection refused */ + afiucv_swap_src_dest(skb); + trans_hdr->flags = AF_IUCV_FLAG_SYN | AF_IUCV_FLAG_FIN; + err = dev_queue_xmit(skb); + goto out; + } + + nsk = iucv_sock_alloc(NULL, sk->sk_type, GFP_ATOMIC); + bh_lock_sock(sk); + if ((sk->sk_state != IUCV_LISTEN) || + sk_acceptq_is_full(sk) || + !nsk) { + /* error on server socket - connection refused */ + if (nsk) + sk_free(nsk); + afiucv_swap_src_dest(skb); + trans_hdr->flags = AF_IUCV_FLAG_SYN | AF_IUCV_FLAG_FIN; + err = dev_queue_xmit(skb); + bh_unlock_sock(sk); + goto out; + } + + niucv = iucv_sk(nsk); + iucv_sock_init(nsk, sk); + niucv->transport = AF_IUCV_TRANS_HIPER; + niucv->msglimit = iucv->msglimit; + if (!trans_hdr->window) + niucv->msglimit_peer = IUCV_HIPER_MSGLIM_DEFAULT; + else + niucv->msglimit_peer = trans_hdr->window; + memcpy(niucv->dst_name, trans_hdr->srcAppName, 8); + memcpy(niucv->dst_user_id, trans_hdr->srcUserID, 8); + memcpy(niucv->src_name, iucv->src_name, 8); + memcpy(niucv->src_user_id, iucv->src_user_id, 8); + nsk->sk_bound_dev_if = sk->sk_bound_dev_if; + afiucv_swap_src_dest(skb); + trans_hdr->flags = AF_IUCV_FLAG_SYN | AF_IUCV_FLAG_ACK; + trans_hdr->window = niucv->msglimit; + /* if receiver acks the xmit connection is established */ + err = dev_queue_xmit(skb); + if (!err) { + iucv_accept_enqueue(sk, nsk); + nsk->sk_state = IUCV_CONNECTED; + sk->sk_data_ready(sk, 1); + } else + iucv_sock_kill(nsk); + bh_unlock_sock(sk); + +out: + return NET_RX_SUCCESS; +} + +/** + * afiucv_hs_callback_synack() - react on received SYN-ACK + **/ +static int afiucv_hs_callback_synack(struct sock *sk, struct sk_buff *skb) +{ + struct iucv_sock *iucv = iucv_sk(sk); + struct af_iucv_trans_hdr *trans_hdr = + (struct af_iucv_trans_hdr *)skb->data; + + if (!iucv) + goto out; + if (sk->sk_state != IUCV_BOUND) + goto out; + bh_lock_sock(sk); + iucv->msglimit_peer = trans_hdr->window; + sk->sk_state = IUCV_CONNECTED; + sk->sk_state_change(sk); + bh_unlock_sock(sk); +out: + kfree_skb(skb); + return NET_RX_SUCCESS; +} + +/** + * afiucv_hs_callback_synfin() - react on received SYN_FIN + **/ +static int afiucv_hs_callback_synfin(struct sock *sk, struct sk_buff *skb) +{ + struct iucv_sock *iucv = iucv_sk(sk); + + if (!iucv) + goto out; + if (sk->sk_state != IUCV_BOUND) + goto out; + bh_lock_sock(sk); + sk->sk_state = IUCV_DISCONN; + sk->sk_state_change(sk); + bh_unlock_sock(sk); +out: + kfree_skb(skb); + return NET_RX_SUCCESS; +} + +/** + * afiucv_hs_callback_fin() - react on received FIN + **/ +static int afiucv_hs_callback_fin(struct sock *sk, struct sk_buff *skb) +{ + struct iucv_sock *iucv = iucv_sk(sk); + + /* other end of connection closed */ + if (iucv) { + bh_lock_sock(sk); + sk->sk_state = IUCV_DISCONN; + sk->sk_state_change(sk); + bh_unlock_sock(sk); + } + kfree_skb(skb); + return NET_RX_SUCCESS; +} + +/** + * afiucv_hs_callback_win() - react on received WIN + **/ +static int afiucv_hs_callback_win(struct sock *sk, struct sk_buff *skb) +{ + struct iucv_sock *iucv = iucv_sk(sk); + struct af_iucv_trans_hdr *trans_hdr = + (struct af_iucv_trans_hdr *)skb->data; + + if (!iucv) + return NET_RX_SUCCESS; + + if (sk->sk_state != IUCV_CONNECTED) + return NET_RX_SUCCESS; + + atomic_sub(trans_hdr->window, &iucv->msg_sent); + iucv_sock_wake_msglim(sk); + return NET_RX_SUCCESS; +} + +/** + * afiucv_hs_callback_rx() - react on received data + **/ +static int afiucv_hs_callback_rx(struct sock *sk, struct sk_buff *skb) +{ + struct iucv_sock *iucv = iucv_sk(sk); + + if (!iucv) { + kfree_skb(skb); + return NET_RX_SUCCESS; + } + + if (sk->sk_state != IUCV_CONNECTED) { + kfree_skb(skb); + return NET_RX_SUCCESS; + } + + /* write stuff from iucv_msg to skb cb */ + if (skb->len <= sizeof(struct af_iucv_trans_hdr)) { + kfree_skb(skb); + return NET_RX_SUCCESS; + } + skb_pull(skb, sizeof(struct af_iucv_trans_hdr)); + skb_reset_transport_header(skb); + skb_reset_network_header(skb); + spin_lock(&iucv->message_q.lock); + if (skb_queue_empty(&iucv->backlog_skb_q)) { + if (sock_queue_rcv_skb(sk, skb)) { + /* handle rcv queue full */ + skb_queue_tail(&iucv->backlog_skb_q, skb); + } + } else + skb_queue_tail(&iucv_sk(sk)->backlog_skb_q, skb); + spin_unlock(&iucv->message_q.lock); + return NET_RX_SUCCESS; +} + +/** + * afiucv_hs_rcv() - base function for arriving data through HiperSockets + * transport + * called from netif RX softirq + **/ +static int afiucv_hs_rcv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, struct net_device *orig_dev) +{ + struct hlist_node *node; + struct sock *sk; + struct iucv_sock *iucv; + struct af_iucv_trans_hdr *trans_hdr; + char nullstring[8]; + int err = 0; + + skb_pull(skb, ETH_HLEN); + trans_hdr = (struct af_iucv_trans_hdr *)skb->data; + EBCASC(trans_hdr->destAppName, sizeof(trans_hdr->destAppName)); + EBCASC(trans_hdr->destUserID, sizeof(trans_hdr->destUserID)); + EBCASC(trans_hdr->srcAppName, sizeof(trans_hdr->srcAppName)); + EBCASC(trans_hdr->srcUserID, sizeof(trans_hdr->srcUserID)); + memset(nullstring, 0, sizeof(nullstring)); + iucv = NULL; + sk = NULL; + read_lock(&iucv_sk_list.lock); + sk_for_each(sk, node, &iucv_sk_list.head) { + if (trans_hdr->flags == AF_IUCV_FLAG_SYN) { + if ((!memcmp(&iucv_sk(sk)->src_name, + trans_hdr->destAppName, 8)) && + (!memcmp(&iucv_sk(sk)->src_user_id, + trans_hdr->destUserID, 8)) && + (!memcmp(&iucv_sk(sk)->dst_name, nullstring, 8)) && + (!memcmp(&iucv_sk(sk)->dst_user_id, + nullstring, 8))) { + iucv = iucv_sk(sk); + break; + } + } else { + if ((!memcmp(&iucv_sk(sk)->src_name, + trans_hdr->destAppName, 8)) && + (!memcmp(&iucv_sk(sk)->src_user_id, + trans_hdr->destUserID, 8)) && + (!memcmp(&iucv_sk(sk)->dst_name, + trans_hdr->srcAppName, 8)) && + (!memcmp(&iucv_sk(sk)->dst_user_id, + trans_hdr->srcUserID, 8))) { + iucv = iucv_sk(sk); + break; + } + } + } + read_unlock(&iucv_sk_list.lock); + if (!iucv) + sk = NULL; + + /* no sock + how should we send with no sock + 1) send without sock no send rc checking? + 2) introduce default sock to handle this cases + + SYN -> send SYN|ACK in good case, send SYN|FIN in bad case + data -> send FIN + SYN|ACK, SYN|FIN, FIN -> no action? */ + + switch (trans_hdr->flags) { + case AF_IUCV_FLAG_SYN: + /* connect request */ + err = afiucv_hs_callback_syn(sk, skb); + break; + case (AF_IUCV_FLAG_SYN | AF_IUCV_FLAG_ACK): + /* connect request confirmed */ + err = afiucv_hs_callback_synack(sk, skb); + break; + case (AF_IUCV_FLAG_SYN | AF_IUCV_FLAG_FIN): + /* connect request refused */ + err = afiucv_hs_callback_synfin(sk, skb); + break; + case (AF_IUCV_FLAG_FIN): + /* close request */ + err = afiucv_hs_callback_fin(sk, skb); + break; + case (AF_IUCV_FLAG_WIN): + err = afiucv_hs_callback_win(sk, skb); + if (skb->len > sizeof(struct af_iucv_trans_hdr)) + err = afiucv_hs_callback_rx(sk, skb); + else + kfree(skb); + break; + case 0: + /* plain data frame */ + memcpy(CB_TRGCLS(skb), &trans_hdr->iucv_hdr.class, + CB_TRGCLS_LEN); + err = afiucv_hs_callback_rx(sk, skb); + break; + default: + ; + } + + return err; +} + +/** + * afiucv_hs_callback_txnotify() - handle send notifcations from HiperSockets + * transport + **/ +static void afiucv_hs_callback_txnotify(struct sk_buff *skb, + enum iucv_tx_notify n) +{ + struct sock *isk = skb->sk; + struct sock *sk = NULL; + struct iucv_sock *iucv = NULL; + struct sk_buff_head *list; + struct sk_buff *list_skb; + struct sk_buff *this = NULL; + unsigned long flags; + struct hlist_node *node; + + read_lock(&iucv_sk_list.lock); + sk_for_each(sk, node, &iucv_sk_list.head) + if (sk == isk) { + iucv = iucv_sk(sk); + break; + } + read_unlock(&iucv_sk_list.lock); + + if (!iucv) + return; + + bh_lock_sock(sk); + list = &iucv->send_skb_q; + list_skb = list->next; + if (skb_queue_empty(list)) + goto out_unlock; + + spin_lock_irqsave(&list->lock, flags); + while (list_skb != (struct sk_buff *)list) { + if (skb_shinfo(list_skb) == skb_shinfo(skb)) { + this = list_skb; + switch (n) { + case TX_NOTIFY_OK: + __skb_unlink(this, list); + iucv_sock_wake_msglim(sk); + dev_put(this->dev); + kfree_skb(this); + break; + case TX_NOTIFY_PENDING: + atomic_inc(&iucv->pendings); + break; + case TX_NOTIFY_DELAYED_OK: + __skb_unlink(this, list); + atomic_dec(&iucv->pendings); + if (atomic_read(&iucv->pendings) <= 0) + iucv_sock_wake_msglim(sk); + dev_put(this->dev); + kfree_skb(this); + break; + case TX_NOTIFY_UNREACHABLE: + case TX_NOTIFY_DELAYED_UNREACHABLE: + case TX_NOTIFY_TPQFULL: /* not yet used */ + case TX_NOTIFY_GENERALERROR: + case TX_NOTIFY_DELAYED_GENERALERROR: + __skb_unlink(this, list); + dev_put(this->dev); + kfree_skb(this); + sk->sk_state = IUCV_DISCONN; + sk->sk_state_change(sk); + break; + } + break; + } + list_skb = list_skb->next; + } + spin_unlock_irqrestore(&list->lock, flags); + + if (sk->sk_state == IUCV_CLOSING) { + if (skb_queue_empty(&iucv_sk(sk)->send_skb_q)) { + sk->sk_state = IUCV_CLOSED; + sk->sk_state_change(sk); + } + } + +out_unlock: + bh_unlock_sock(sk); +} static const struct proto_ops iucv_sock_ops = { .family = PF_IUCV, .owner = THIS_MODULE, @@ -1718,71 +2299,104 @@ static const struct net_proto_family iucv_sock_family_ops = { .create = iucv_sock_create, }; -static int __init afiucv_init(void) +static struct packet_type iucv_packet_type = { + .type = cpu_to_be16(ETH_P_AF_IUCV), + .func = afiucv_hs_rcv, +}; + +static int afiucv_iucv_init(void) { int err; - if (!MACHINE_IS_VM) { - pr_err("The af_iucv module cannot be loaded" - " without z/VM\n"); - err = -EPROTONOSUPPORT; - goto out; - } - cpcmd("QUERY USERID", iucv_userid, sizeof(iucv_userid), &err); - if (unlikely(err)) { - WARN_ON(err); - err = -EPROTONOSUPPORT; - goto out; - } - - err = iucv_register(&af_iucv_handler, 0); + err = pr_iucv->iucv_register(&af_iucv_handler, 0); if (err) goto out; - err = proto_register(&iucv_proto, 0); - if (err) - goto out_iucv; - err = sock_register(&iucv_sock_family_ops); - if (err) - goto out_proto; /* establish dummy device */ + af_iucv_driver.bus = pr_iucv->bus; err = driver_register(&af_iucv_driver); if (err) - goto out_sock; + goto out_iucv; af_iucv_dev = kzalloc(sizeof(struct device), GFP_KERNEL); if (!af_iucv_dev) { err = -ENOMEM; goto out_driver; } dev_set_name(af_iucv_dev, "af_iucv"); - af_iucv_dev->bus = &iucv_bus; - af_iucv_dev->parent = iucv_root; + af_iucv_dev->bus = pr_iucv->bus; + af_iucv_dev->parent = pr_iucv->root; af_iucv_dev->release = (void (*)(struct device *))kfree; af_iucv_dev->driver = &af_iucv_driver; err = device_register(af_iucv_dev); if (err) goto out_driver; - return 0; out_driver: driver_unregister(&af_iucv_driver); +out_iucv: + pr_iucv->iucv_unregister(&af_iucv_handler, 0); +out: + return err; +} + +static int __init afiucv_init(void) +{ + int err; + + if (MACHINE_IS_VM) { + cpcmd("QUERY USERID", iucv_userid, sizeof(iucv_userid), &err); + if (unlikely(err)) { + WARN_ON(err); + err = -EPROTONOSUPPORT; + goto out; + } + + pr_iucv = try_then_request_module(symbol_get(iucv_if), "iucv"); + if (!pr_iucv) { + printk(KERN_WARNING "iucv_if lookup failed\n"); + memset(&iucv_userid, 0, sizeof(iucv_userid)); + } + } else { + memset(&iucv_userid, 0, sizeof(iucv_userid)); + pr_iucv = NULL; + } + + err = proto_register(&iucv_proto, 0); + if (err) + goto out; + err = sock_register(&iucv_sock_family_ops); + if (err) + goto out_proto; + + if (pr_iucv) { + err = afiucv_iucv_init(); + if (err) + goto out_sock; + } + dev_add_pack(&iucv_packet_type); + return 0; + out_sock: sock_unregister(PF_IUCV); out_proto: proto_unregister(&iucv_proto); -out_iucv: - iucv_unregister(&af_iucv_handler, 0); out: + if (pr_iucv) + symbol_put(iucv_if); return err; } static void __exit afiucv_exit(void) { - device_unregister(af_iucv_dev); - driver_unregister(&af_iucv_driver); + if (pr_iucv) { + device_unregister(af_iucv_dev); + driver_unregister(&af_iucv_driver); + pr_iucv->iucv_unregister(&af_iucv_handler, 0); + symbol_put(iucv_if); + } + dev_remove_pack(&iucv_packet_type); sock_unregister(PF_IUCV); proto_unregister(&iucv_proto); - iucv_unregister(&af_iucv_handler, 0); } module_init(afiucv_init); @@ -1793,3 +2407,4 @@ MODULE_DESCRIPTION("IUCV Sockets ver " VERSION); MODULE_VERSION(VERSION); MODULE_LICENSE("GPL"); MODULE_ALIAS_NETPROTO(PF_IUCV); + diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c index 075a3808aa40..403be43b793d 100644 --- a/net/iucv/iucv.c +++ b/net/iucv/iucv.c @@ -1974,6 +1974,27 @@ out: return rc; } +struct iucv_interface iucv_if = { + .message_receive = iucv_message_receive, + .__message_receive = __iucv_message_receive, + .message_reply = iucv_message_reply, + .message_reject = iucv_message_reject, + .message_send = iucv_message_send, + .__message_send = __iucv_message_send, + .message_send2way = iucv_message_send2way, + .message_purge = iucv_message_purge, + .path_accept = iucv_path_accept, + .path_connect = iucv_path_connect, + .path_quiesce = iucv_path_quiesce, + .path_resume = iucv_path_resume, + .path_sever = iucv_path_sever, + .iucv_register = iucv_register, + .iucv_unregister = iucv_unregister, + .bus = NULL, + .root = NULL, +}; +EXPORT_SYMBOL(iucv_if); + /** * iucv_init * @@ -2038,6 +2059,8 @@ static int __init iucv_init(void) rc = bus_register(&iucv_bus); if (rc) goto out_reboot; + iucv_if.root = iucv_root; + iucv_if.bus = &iucv_bus; return 0; out_reboot: diff --git a/net/key/af_key.c b/net/key/af_key.c index 1e733e9073d0..11dbb2255ccb 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -375,7 +375,7 @@ static int verify_address_len(const void *p) const struct sadb_address *sp = p; const struct sockaddr *addr = (const struct sockaddr *)(sp + 1); const struct sockaddr_in *sin; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) const struct sockaddr_in6 *sin6; #endif int len; @@ -387,7 +387,7 @@ static int verify_address_len(const void *p) sp->sadb_address_prefixlen > 32) return -EINVAL; break; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) case AF_INET6: len = DIV_ROUND_UP(sizeof(*sp) + sizeof(*sin6), sizeof(uint64_t)); if (sp->sadb_address_len != len || @@ -469,7 +469,7 @@ static int present_and_same_family(const struct sadb_address *src, if (s_addr->sa_family != d_addr->sa_family) return 0; if (s_addr->sa_family != AF_INET -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) && s_addr->sa_family != AF_INET6 #endif ) @@ -579,7 +579,7 @@ static inline int pfkey_sockaddr_len(sa_family_t family) switch (family) { case AF_INET: return sizeof(struct sockaddr_in); -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) case AF_INET6: return sizeof(struct sockaddr_in6); #endif @@ -595,7 +595,7 @@ int pfkey_sockaddr_extract(const struct sockaddr *sa, xfrm_address_t *xaddr) xaddr->a4 = ((struct sockaddr_in *)sa)->sin_addr.s_addr; return AF_INET; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) case AF_INET6: memcpy(xaddr->a6, &((struct sockaddr_in6 *)sa)->sin6_addr, @@ -639,7 +639,7 @@ static struct xfrm_state *pfkey_xfrm_state_lookup(struct net *net, const struct case AF_INET: xaddr = (xfrm_address_t *)&((const struct sockaddr_in *)(addr + 1))->sin_addr; break; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) case AF_INET6: xaddr = (xfrm_address_t *)&((const struct sockaddr_in6 *)(addr + 1))->sin6_addr; break; @@ -705,14 +705,14 @@ static unsigned int pfkey_sockaddr_fill(const xfrm_address_t *xaddr, __be16 port memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); return 32; } -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) case AF_INET6: { struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sa; sin6->sin6_family = AF_INET6; sin6->sin6_port = port; sin6->sin6_flowinfo = 0; - ipv6_addr_copy(&sin6->sin6_addr, (const struct in6_addr *)xaddr->a6); + sin6->sin6_addr = *(struct in6_addr *)xaddr->a6; sin6->sin6_scope_id = 0; return 128; } @@ -1311,7 +1311,7 @@ static int pfkey_getspi(struct sock *sk, struct sk_buff *skb, const struct sadb_ xdaddr = (xfrm_address_t *)&((struct sockaddr_in *)(daddr + 1))->sin_addr.s_addr; xsaddr = (xfrm_address_t *)&((struct sockaddr_in *)(saddr + 1))->sin_addr.s_addr; break; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) case AF_INET6: xdaddr = (xfrm_address_t *)&((struct sockaddr_in6 *)(daddr + 1))->sin6_addr; xsaddr = (xfrm_address_t *)&((struct sockaddr_in6 *)(saddr + 1))->sin6_addr; @@ -3146,7 +3146,7 @@ static struct xfrm_policy *pfkey_compile_policy(struct sock *sk, int opt, return NULL; } break; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) case AF_INET6: if (opt != IPV6_IPSEC_POLICY) { *dir = -EOPNOTSUPP; diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 34b2ddeacb67..89ff8c67943e 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -397,6 +397,7 @@ static void l2tp_recv_dequeue(struct l2tp_session *session) * expect to send up next, dequeue it and any other * in-sequence packets behind it. */ +start: spin_lock_bh(&session->reorder_q.lock); skb_queue_walk_safe(&session->reorder_q, skb, tmp) { if (time_after(jiffies, L2TP_SKB_CB(skb)->expires)) { @@ -433,7 +434,7 @@ static void l2tp_recv_dequeue(struct l2tp_session *session) */ spin_unlock_bh(&session->reorder_q.lock); l2tp_recv_dequeue_skb(session, skb); - spin_lock_bh(&session->reorder_q.lock); + goto start; } out: @@ -755,9 +756,6 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb, goto error; } - /* Point to L2TP header */ - optr = ptr = skb->data; - /* Trace packet contents, if enabled */ if (tunnel->debug & L2TP_MSG_DATA) { length = min(32u, skb->len); @@ -768,12 +766,15 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb, offset = 0; do { - printk(" %02X", ptr[offset]); + printk(" %02X", skb->data[offset]); } while (++offset < length); printk("\n"); } + /* Point to L2TP header */ + optr = ptr = skb->data; + /* Get L2TP header flags */ hdrflags = ntohs(*(__be16 *) ptr); @@ -1071,7 +1072,7 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len /* Get routing info from the tunnel socket */ skb_dst_drop(skb); - skb_dst_set(skb, dst_clone(__sk_dst_get(sk))); + skb_dst_set(skb, dst_clone(__sk_dst_check(sk, 0))); inet = inet_sk(sk); fl = &inet->cork.fl; diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index f42cd0915966..8a90d756c904 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -395,6 +395,7 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb) struct pppol2tp_session *ps; int old_headroom; int new_headroom; + int uhlen, headroom; if (sock_flag(sk, SOCK_DEAD) || !(sk->sk_state & PPPOX_CONNECTED)) goto abort; @@ -413,7 +414,13 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb) goto abort_put_sess; old_headroom = skb_headroom(skb); - if (skb_cow_head(skb, sizeof(ppph))) + uhlen = (tunnel->encap == L2TP_ENCAPTYPE_UDP) ? sizeof(struct udphdr) : 0; + headroom = NET_SKB_PAD + + sizeof(struct iphdr) + /* IP header */ + uhlen + /* UDP header (if L2TP_ENCAPTYPE_UDP) */ + session->hdr_len + /* L2TP header */ + sizeof(ppph); /* PPP header */ + if (skb_cow_head(skb, headroom)) goto abort_put_sess_tun; new_headroom = skb_headroom(skb); diff --git a/net/lapb/lapb_iface.c b/net/lapb/lapb_iface.c index 956b7e47dc52..8d0324bac01c 100644 --- a/net/lapb/lapb_iface.c +++ b/net/lapb/lapb_iface.c @@ -139,7 +139,8 @@ out: return lapb; } -int lapb_register(struct net_device *dev, struct lapb_register_struct *callbacks) +int lapb_register(struct net_device *dev, + const struct lapb_register_struct *callbacks) { struct lapb_cb *lapb; int rc = LAPB_BADTOKEN; @@ -158,7 +159,7 @@ int lapb_register(struct net_device *dev, struct lapb_register_struct *callbacks goto out; lapb->dev = dev; - lapb->callbacks = *callbacks; + lapb->callbacks = callbacks; __lapb_insert_cb(lapb); @@ -380,32 +381,32 @@ int lapb_data_received(struct net_device *dev, struct sk_buff *skb) void lapb_connect_confirmation(struct lapb_cb *lapb, int reason) { - if (lapb->callbacks.connect_confirmation) - lapb->callbacks.connect_confirmation(lapb->dev, reason); + if (lapb->callbacks->connect_confirmation) + lapb->callbacks->connect_confirmation(lapb->dev, reason); } void lapb_connect_indication(struct lapb_cb *lapb, int reason) { - if (lapb->callbacks.connect_indication) - lapb->callbacks.connect_indication(lapb->dev, reason); + if (lapb->callbacks->connect_indication) + lapb->callbacks->connect_indication(lapb->dev, reason); } void lapb_disconnect_confirmation(struct lapb_cb *lapb, int reason) { - if (lapb->callbacks.disconnect_confirmation) - lapb->callbacks.disconnect_confirmation(lapb->dev, reason); + if (lapb->callbacks->disconnect_confirmation) + lapb->callbacks->disconnect_confirmation(lapb->dev, reason); } void lapb_disconnect_indication(struct lapb_cb *lapb, int reason) { - if (lapb->callbacks.disconnect_indication) - lapb->callbacks.disconnect_indication(lapb->dev, reason); + if (lapb->callbacks->disconnect_indication) + lapb->callbacks->disconnect_indication(lapb->dev, reason); } int lapb_data_indication(struct lapb_cb *lapb, struct sk_buff *skb) { - if (lapb->callbacks.data_indication) - return lapb->callbacks.data_indication(lapb->dev, skb); + if (lapb->callbacks->data_indication) + return lapb->callbacks->data_indication(lapb->dev, skb); kfree_skb(skb); return NET_RX_SUCCESS; /* For now; must be != NET_RX_DROP */ @@ -415,8 +416,8 @@ int lapb_data_transmit(struct lapb_cb *lapb, struct sk_buff *skb) { int used = 0; - if (lapb->callbacks.data_transmit) { - lapb->callbacks.data_transmit(lapb->dev, skb); + if (lapb->callbacks->data_transmit) { + lapb->callbacks->data_transmit(lapb->dev, skb); used = 1; } diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index dfd3a648a551..a18e6c3d36e3 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -833,15 +833,15 @@ static int llc_ui_recvmsg(struct kiocb *iocb, struct socket *sock, copied += used; len -= used; + /* For non stream protcols we get one packet per recvmsg call */ + if (sk->sk_type != SOCK_STREAM) + goto copy_uaddr; + if (!(flags & MSG_PEEK)) { sk_eat_skb(sk, skb, 0); *seq = 0; } - /* For non stream protcols we get one packet per recvmsg call */ - if (sk->sk_type != SOCK_STREAM) - goto copy_uaddr; - /* Partial read */ if (used + offset < skb->len) continue; @@ -857,6 +857,12 @@ copy_uaddr: } if (llc_sk(sk)->cmsg_flags) llc_cmsg_rcv(msg, skb); + + if (!(flags & MSG_PEEK)) { + sk_eat_skb(sk, skb, 0); + *seq = 0; + } + goto out; } diff --git a/net/llc/llc_input.c b/net/llc/llc_input.c index 903242111317..e32cab44ea95 100644 --- a/net/llc/llc_input.c +++ b/net/llc/llc_input.c @@ -13,6 +13,7 @@ */ #include <linux/netdevice.h> #include <linux/slab.h> +#include <linux/export.h> #include <net/net_namespace.h> #include <net/llc.h> #include <net/llc_pdu.h> diff --git a/net/llc/llc_output.c b/net/llc/llc_output.c index b38a1079a98e..b658cba89fdd 100644 --- a/net/llc/llc_output.c +++ b/net/llc/llc_output.c @@ -18,6 +18,7 @@ #include <linux/netdevice.h> #include <linux/trdevice.h> #include <linux/skbuff.h> +#include <linux/export.h> #include <net/llc.h> #include <net/llc_pdu.h> diff --git a/net/llc/llc_proc.c b/net/llc/llc_proc.c index 7af1ff2d1f19..a1839c004357 100644 --- a/net/llc/llc_proc.c +++ b/net/llc/llc_proc.c @@ -17,6 +17,7 @@ #include <linux/proc_fs.h> #include <linux/errno.h> #include <linux/seq_file.h> +#include <linux/export.h> #include <net/net_namespace.h> #include <net/sock.h> #include <net/llc.h> diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig index f5fdfcbf552a..96ddb72760b9 100644 --- a/net/mac80211/Kconfig +++ b/net/mac80211/Kconfig @@ -199,6 +199,19 @@ config MAC80211_VERBOSE_MPL_DEBUG Do not select this option. +config MAC80211_VERBOSE_MPATH_DEBUG + bool "Verbose mesh path debugging" + depends on MAC80211_DEBUG_MENU + depends on MAC80211_MESH + ---help--- + Selecting this option causes mac80211 to print out very + verbose mesh path selection debugging messages (when mac80211 + is taking part in a mesh network). + It should not be selected on production systems as those + messages are remotely triggerable. + + Do not select this option. + config MAC80211_VERBOSE_MHWMP_DEBUG bool "Verbose mesh HWMP routing debugging" depends on MAC80211_DEBUG_MENU @@ -212,6 +225,18 @@ config MAC80211_VERBOSE_MHWMP_DEBUG Do not select this option. +config MAC80211_VERBOSE_TDLS_DEBUG + bool "Verbose TDLS debugging" + depends on MAC80211_DEBUG_MENU + ---help--- + Selecting this option causes mac80211 to print out very + verbose TDLS selection debugging messages (when mac80211 + is a TDLS STA). + It should not be selected on production systems as those + messages are remotely triggerable. + + Do not select this option. + config MAC80211_DEBUG_COUNTERS bool "Extra statistics for TX/RX debugging" depends on MAC80211_DEBUG_MENU @@ -222,15 +247,3 @@ config MAC80211_DEBUG_COUNTERS and show them in debugfs. If unsure, say N. - -config MAC80211_DRIVER_API_TRACER - bool "Driver API tracer" - depends on MAC80211_DEBUG_MENU - depends on EVENT_TRACING - help - Say Y here to make mac80211 register with the ftrace - framework for the driver API -- you can then see which - driver methods it is calling and which API functions - drivers are calling by looking at the trace. - - If unsure, say Y. diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile index fdb54e61d637..d540c3b160f3 100644 --- a/net/mac80211/Makefile +++ b/net/mac80211/Makefile @@ -24,7 +24,8 @@ mac80211-y := \ util.o \ wme.o \ event.o \ - chan.o + chan.o \ + driver-trace.o mac80211-$(CONFIG_MAC80211_LEDS) += led.o mac80211-$(CONFIG_MAC80211_DEBUGFS) += \ @@ -41,7 +42,6 @@ mac80211-$(CONFIG_MAC80211_MESH) += \ mac80211-$(CONFIG_PM) += pm.o -mac80211-$(CONFIG_MAC80211_DRIVER_API_TRACER) += driver-trace.o CFLAGS_driver-trace.o := -I$(src) # objects for PID algorithm diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index fd1aaf2a4a6c..1068f668ac4e 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c @@ -38,6 +38,7 @@ #include <linux/ieee80211.h> #include <linux/slab.h> +#include <linux/export.h> #include <net/mac80211.h> #include "ieee80211_i.h" #include "driver-ops.h" @@ -69,11 +70,14 @@ void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid, if (!tid_rx) return; - rcu_assign_pointer(sta->ampdu_mlme.tid_rx[tid], NULL); + RCU_INIT_POINTER(sta->ampdu_mlme.tid_rx[tid], NULL); #ifdef CONFIG_MAC80211_HT_DEBUG - printk(KERN_DEBUG "Rx BA session stop requested for %pM tid %u\n", - sta->sta.addr, tid); + printk(KERN_DEBUG + "Rx BA session stop requested for %pM tid %u %s reason: %d\n", + sta->sta.addr, tid, + initiator == WLAN_BACK_RECIPIENT ? "recipient" : "inititator", + (int)reason); #endif /* CONFIG_MAC80211_HT_DEBUG */ if (drv_ampdu_action(local, sta->sdata, IEEE80211_AMPDU_RX_STOP, @@ -84,7 +88,7 @@ void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid, /* check if this is a self generated aggregation halt */ if (initiator == WLAN_BACK_RECIPIENT && tx) ieee80211_send_delba(sta->sdata, sta->sta.addr, - tid, 0, reason); + tid, WLAN_BACK_RECIPIENT, reason); del_timer_sync(&tid_rx->session_timer); del_timer_sync(&tid_rx->reorder_timer); @@ -108,7 +112,7 @@ void ieee80211_stop_rx_ba_session(struct ieee80211_vif *vif, u16 ba_rx_bitmap, int i; rcu_read_lock(); - sta = sta_info_get(sdata, addr); + sta = sta_info_get_bss(sdata, addr); if (!sta) { rcu_read_unlock(); return; @@ -167,12 +171,8 @@ static void ieee80211_send_addba_resp(struct ieee80211_sub_if_data *sdata, u8 *d u16 capab; skb = dev_alloc_skb(sizeof(*mgmt) + local->hw.extra_tx_headroom); - - if (!skb) { - printk(KERN_DEBUG "%s: failed to allocate buffer " - "for addba resp frame\n", sdata->name); + if (!skb) return; - } skb_reserve(skb, local->hw.extra_tx_headroom); mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24); @@ -180,10 +180,13 @@ static void ieee80211_send_addba_resp(struct ieee80211_sub_if_data *sdata, u8 *d memcpy(mgmt->da, da, ETH_ALEN); memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN); if (sdata->vif.type == NL80211_IFTYPE_AP || - sdata->vif.type == NL80211_IFTYPE_AP_VLAN) + sdata->vif.type == NL80211_IFTYPE_AP_VLAN || + sdata->vif.type == NL80211_IFTYPE_MESH_POINT) memcpy(mgmt->bssid, sdata->vif.addr, ETH_ALEN); else if (sdata->vif.type == NL80211_IFTYPE_STATION) memcpy(mgmt->bssid, sdata->u.mgd.bssid, ETH_ALEN); + else if (sdata->vif.type == NL80211_IFTYPE_ADHOC) + memcpy(mgmt->bssid, sdata->u.ibss.bssid, ETH_ALEN); mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_ACTION); @@ -227,7 +230,7 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, status = WLAN_STATUS_REQUEST_DECLINED; - if (test_sta_flags(sta, WLAN_STA_BLOCK_BA)) { + if (test_sta_flag(sta, WLAN_STA_BLOCK_BA)) { #ifdef CONFIG_MAC80211_HT_DEBUG printk(KERN_DEBUG "Suspend in progress. " "Denying ADDBA request\n"); @@ -279,14 +282,8 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, /* prepare A-MPDU MLME for Rx aggregation */ tid_agg_rx = kmalloc(sizeof(struct tid_ampdu_rx), GFP_KERNEL); - if (!tid_agg_rx) { -#ifdef CONFIG_MAC80211_HT_DEBUG - if (net_ratelimit()) - printk(KERN_ERR "allocate rx mlme to tid %d failed\n", - tid); -#endif + if (!tid_agg_rx) goto end; - } spin_lock_init(&tid_agg_rx->reorder_lock); @@ -306,11 +303,6 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, tid_agg_rx->reorder_time = kcalloc(buf_size, sizeof(unsigned long), GFP_KERNEL); if (!tid_agg_rx->reorder_buf || !tid_agg_rx->reorder_time) { -#ifdef CONFIG_MAC80211_HT_DEBUG - if (net_ratelimit()) - printk(KERN_ERR "can not allocate reordering buffer " - "to tid %d\n", tid); -#endif kfree(tid_agg_rx->reorder_buf); kfree(tid_agg_rx->reorder_time); kfree(tid_agg_rx); diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index c8be8eff70da..76be61744198 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -15,6 +15,7 @@ #include <linux/ieee80211.h> #include <linux/slab.h> +#include <linux/export.h> #include <net/mac80211.h> #include "ieee80211_i.h" #include "driver-ops.h" @@ -54,6 +55,8 @@ * @ampdu_action function will be called with the action * %IEEE80211_AMPDU_TX_STOP. In this case, the call must not fail, * and the driver must later call ieee80211_stop_tx_ba_cb_irqsafe(). + * Note that the sta can get destroyed before the BA tear down is + * complete. */ static void ieee80211_send_addba_request(struct ieee80211_sub_if_data *sdata, @@ -68,21 +71,22 @@ static void ieee80211_send_addba_request(struct ieee80211_sub_if_data *sdata, skb = dev_alloc_skb(sizeof(*mgmt) + local->hw.extra_tx_headroom); - if (!skb) { - printk(KERN_ERR "%s: failed to allocate buffer " - "for addba request frame\n", sdata->name); + if (!skb) return; - } + skb_reserve(skb, local->hw.extra_tx_headroom); mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24); memset(mgmt, 0, 24); memcpy(mgmt->da, da, ETH_ALEN); memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN); if (sdata->vif.type == NL80211_IFTYPE_AP || - sdata->vif.type == NL80211_IFTYPE_AP_VLAN) + sdata->vif.type == NL80211_IFTYPE_AP_VLAN || + sdata->vif.type == NL80211_IFTYPE_MESH_POINT) memcpy(mgmt->bssid, sdata->vif.addr, ETH_ALEN); else if (sdata->vif.type == NL80211_IFTYPE_STATION) memcpy(mgmt->bssid, sdata->u.mgd.bssid, ETH_ALEN); + else if (sdata->vif.type == NL80211_IFTYPE_ADHOC) + memcpy(mgmt->bssid, sdata->u.ibss.bssid, ETH_ALEN); mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_ACTION); @@ -103,22 +107,21 @@ static void ieee80211_send_addba_request(struct ieee80211_sub_if_data *sdata, mgmt->u.action.u.addba_req.start_seq_num = cpu_to_le16(start_seq_num << 4); - ieee80211_tx_skb(sdata, skb); + ieee80211_tx_skb_tid(sdata, skb, tid); } -void ieee80211_send_bar(struct ieee80211_sub_if_data *sdata, u8 *ra, u16 tid, u16 ssn) +void ieee80211_send_bar(struct ieee80211_vif *vif, u8 *ra, u16 tid, u16 ssn) { + struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); struct ieee80211_local *local = sdata->local; struct sk_buff *skb; struct ieee80211_bar *bar; u16 bar_control = 0; skb = dev_alloc_skb(sizeof(*bar) + local->hw.extra_tx_headroom); - if (!skb) { - printk(KERN_ERR "%s: failed to allocate buffer for " - "bar frame\n", sdata->name); + if (!skb) return; - } + skb_reserve(skb, local->hw.extra_tx_headroom); bar = (struct ieee80211_bar *)skb_put(skb, sizeof(*bar)); memset(bar, 0, sizeof(*bar)); @@ -128,13 +131,14 @@ void ieee80211_send_bar(struct ieee80211_sub_if_data *sdata, u8 *ra, u16 tid, u1 memcpy(bar->ta, sdata->vif.addr, ETH_ALEN); bar_control |= (u16)IEEE80211_BAR_CTRL_ACK_POLICY_NORMAL; bar_control |= (u16)IEEE80211_BAR_CTRL_CBMTID_COMPRESSED_BA; - bar_control |= (u16)(tid << 12); + bar_control |= (u16)(tid << IEEE80211_BAR_CTRL_TID_INFO_SHIFT); bar->control = cpu_to_le16(bar_control); bar->start_seq_num = cpu_to_le16(ssn); IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT; - ieee80211_tx_skb(sdata, skb); + ieee80211_tx_skb_tid(sdata, skb, tid); } +EXPORT_SYMBOL(ieee80211_send_bar); void ieee80211_assign_tid_tx(struct sta_info *sta, int tid, struct tid_ampdu_tx *tid_tx) @@ -162,6 +166,12 @@ int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, return -ENOENT; } + /* if we're already stopping ignore any new requests to stop */ + if (test_bit(HT_AGG_STATE_STOPPING, &tid_tx->state)) { + spin_unlock_bh(&sta->lock); + return -EALREADY; + } + if (test_bit(HT_AGG_STATE_WANT_START, &tid_tx->state)) { /* not even started yet! */ ieee80211_assign_tid_tx(sta, tid, NULL); @@ -170,6 +180,8 @@ int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, return 0; } + set_bit(HT_AGG_STATE_STOPPING, &tid_tx->state); + spin_unlock_bh(&sta->lock); #ifdef CONFIG_MAC80211_HT_DEBUG @@ -177,9 +189,8 @@ int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, sta->sta.addr, tid); #endif /* CONFIG_MAC80211_HT_DEBUG */ - set_bit(HT_AGG_STATE_STOPPING, &tid_tx->state); - del_timer_sync(&tid_tx->addba_resp_timer); + del_timer_sync(&tid_tx->session_timer); /* * After this packets are no longer handed right through @@ -188,6 +199,20 @@ int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, */ clear_bit(HT_AGG_STATE_OPERATIONAL, &tid_tx->state); + /* + * There might be a few packets being processed right now (on + * another CPU) that have already gotten past the aggregation + * check when it was still OPERATIONAL and consequently have + * IEEE80211_TX_CTL_AMPDU set. In that case, this code might + * call into the driver at the same time or even before the + * TX paths calls into it, which could confuse the driver. + * + * Wait for all currently running TX paths to finish before + * telling the driver. New packets will not go through since + * the aggregation session is no longer OPERATIONAL. + */ + synchronize_net(); + tid_tx->stop_initiator = initiator; tid_tx->tx_stop = tx; @@ -284,6 +309,38 @@ ieee80211_wake_queue_agg(struct ieee80211_local *local, int tid) __release(agg_queue); } +/* + * splice packets from the STA's pending to the local pending, + * requires a call to ieee80211_agg_splice_finish later + */ +static void __acquires(agg_queue) +ieee80211_agg_splice_packets(struct ieee80211_local *local, + struct tid_ampdu_tx *tid_tx, u16 tid) +{ + int queue = ieee80211_ac_from_tid(tid); + unsigned long flags; + + ieee80211_stop_queue_agg(local, tid); + + if (WARN(!tid_tx, "TID %d gone but expected when splicing aggregates" + " from the pending queue\n", tid)) + return; + + if (!skb_queue_empty(&tid_tx->pending)) { + spin_lock_irqsave(&local->queue_stop_reason_lock, flags); + /* copy over remaining packets */ + skb_queue_splice_tail_init(&tid_tx->pending, + &local->pending[queue]); + spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); + } +} + +static void __releases(agg_queue) +ieee80211_agg_splice_finish(struct ieee80211_local *local, u16 tid) +{ + ieee80211_wake_queue_agg(local, tid); +} + void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid) { struct tid_ampdu_tx *tid_tx; @@ -295,19 +352,17 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid) tid_tx = rcu_dereference_protected_tid_tx(sta, tid); /* - * While we're asking the driver about the aggregation, - * stop the AC queue so that we don't have to worry - * about frames that came in while we were doing that, - * which would require us to put them to the AC pending - * afterwards which just makes the code more complex. + * Start queuing up packets for this aggregation session. + * We're going to release them once the driver is OK with + * that. */ - ieee80211_stop_queue_agg(local, tid); - clear_bit(HT_AGG_STATE_WANT_START, &tid_tx->state); /* - * make sure no packets are being processed to get - * valid starting sequence number + * Make sure no packets are being processed. This ensures that + * we have a valid starting sequence number and that in-flight + * packets have been flushed out and no packets for this TID + * will go into the driver during the ampdu_action call. */ synchronize_net(); @@ -321,17 +376,15 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid) " tid %d\n", tid); #endif spin_lock_bh(&sta->lock); + ieee80211_agg_splice_packets(local, tid_tx, tid); ieee80211_assign_tid_tx(sta, tid, NULL); + ieee80211_agg_splice_finish(local, tid); spin_unlock_bh(&sta->lock); - ieee80211_wake_queue_agg(local, tid); kfree_rcu(tid_tx, rcu_head); return; } - /* we can take packets again now */ - ieee80211_wake_queue_agg(local, tid); - /* activate the timer for the recipient's addBA response */ mod_timer(&tid_tx->addba_resp_timer, jiffies + ADDBA_RESP_INTERVAL); #ifdef CONFIG_MAC80211_HT_DEBUG @@ -339,6 +392,7 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid) #endif spin_lock_bh(&sta->lock); + sta->ampdu_mlme.last_addba_req_time[tid] = jiffies; sta->ampdu_mlme.addba_req_num[tid]++; spin_unlock_bh(&sta->lock); @@ -349,6 +403,28 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid) tid_tx->timeout); } +/* + * After accepting the AddBA Response we activated a timer, + * resetting it after each frame that we send. + */ +static void sta_tx_agg_session_timer_expired(unsigned long data) +{ + /* not an elegant detour, but there is no choice as the timer passes + * only one argument, and various sta_info are needed here, so init + * flow in sta_info_create gives the TID as data, while the timer_to_id + * array gives the sta through container_of */ + u8 *ptid = (u8 *)data; + u8 *timer_to_id = ptid - *ptid; + struct sta_info *sta = container_of(timer_to_id, struct sta_info, + timer_to_tid[0]); + +#ifdef CONFIG_MAC80211_HT_DEBUG + printk(KERN_DEBUG "tx session timer expired on tid %d\n", (u16)*ptid); +#endif + + ieee80211_stop_tx_ba_session(&sta->sta, *ptid); +} + int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid, u16 timeout) { @@ -364,7 +440,8 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid, return -EINVAL; if ((tid >= STA_TID_NUM) || - !(local->hw.flags & IEEE80211_HW_AMPDU_AGGREGATION)) + !(local->hw.flags & IEEE80211_HW_AMPDU_AGGREGATION) || + (local->hw.flags & IEEE80211_HW_TX_AMPDU_SETUP_IN_HW)) return -EINVAL; #ifdef CONFIG_MAC80211_HT_DEBUG @@ -372,18 +449,14 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid, pubsta->addr, tid); #endif /* CONFIG_MAC80211_HT_DEBUG */ - /* - * The aggregation code is not prepared to handle - * anything but STA/AP due to the BSSID handling. - * IBSS could work in the code but isn't supported - * by drivers or the standard. - */ if (sdata->vif.type != NL80211_IFTYPE_STATION && + sdata->vif.type != NL80211_IFTYPE_MESH_POINT && sdata->vif.type != NL80211_IFTYPE_AP_VLAN && - sdata->vif.type != NL80211_IFTYPE_AP) + sdata->vif.type != NL80211_IFTYPE_AP && + sdata->vif.type != NL80211_IFTYPE_ADHOC) return -EINVAL; - if (test_sta_flags(sta, WLAN_STA_BLOCK_BA)) { + if (test_sta_flag(sta, WLAN_STA_BLOCK_BA)) { #ifdef CONFIG_MAC80211_HT_DEBUG printk(KERN_DEBUG "BA sessions blocked. " "Denying BA session request\n"); @@ -391,6 +464,27 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid, return -EINVAL; } + /* + * 802.11n-2009 11.5.1.1: If the initiating STA is an HT STA, is a + * member of an IBSS, and has no other existing Block Ack agreement + * with the recipient STA, then the initiating STA shall transmit a + * Probe Request frame to the recipient STA and shall not transmit an + * ADDBA Request frame unless it receives a Probe Response frame + * from the recipient within dot11ADDBAFailureTimeout. + * + * The probe request mechanism for ADDBA is currently not implemented, + * but we only build up Block Ack session with HT STAs. This information + * is set when we receive a bss info from a probe response or a beacon. + */ + if (sta->sdata->vif.type == NL80211_IFTYPE_ADHOC && + !sta->sta.ht_cap.ht_supported) { +#ifdef CONFIG_MAC80211_HT_DEBUG + printk(KERN_DEBUG "BA request denied - IBSS STA %pM" + "does not advertise HT support\n", pubsta->addr); +#endif /* CONFIG_MAC80211_HT_DEBUG */ + return -EINVAL; + } + spin_lock_bh(&sta->lock); /* we have tried too many times, receiver does not want A-MPDU */ @@ -399,6 +493,24 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid, goto err_unlock_sta; } + /* + * if we have tried more than HT_AGG_BURST_RETRIES times we + * will spread our requests in time to avoid stalling connection + * for too long + */ + if (sta->ampdu_mlme.addba_req_num[tid] > HT_AGG_BURST_RETRIES && + time_before(jiffies, sta->ampdu_mlme.last_addba_req_time[tid] + + HT_AGG_RETRIES_PERIOD)) { +#ifdef CONFIG_MAC80211_HT_DEBUG + printk(KERN_DEBUG "BA request denied - " + "waiting a grace period after %d failed requests " + "on tid %u\n", + sta->ampdu_mlme.addba_req_num[tid], tid); +#endif /* CONFIG_MAC80211_HT_DEBUG */ + ret = -EBUSY; + goto err_unlock_sta; + } + tid_tx = rcu_dereference_protected_tid_tx(sta, tid); /* check if the TID is not in aggregation flow already */ if (tid_tx || sta->ampdu_mlme.tid_start_tx[tid]) { @@ -413,11 +525,6 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid, /* prepare A-MPDU MLME for Tx aggregation */ tid_tx = kzalloc(sizeof(struct tid_ampdu_tx), GFP_ATOMIC); if (!tid_tx) { -#ifdef CONFIG_MAC80211_HT_DEBUG - if (net_ratelimit()) - printk(KERN_ERR "allocate tx mlme to tid %d failed\n", - tid); -#endif ret = -ENOMEM; goto err_unlock_sta; } @@ -427,11 +534,16 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid, tid_tx->timeout = timeout; - /* Tx timer */ + /* response timer */ tid_tx->addba_resp_timer.function = sta_addba_resp_timer_expired; tid_tx->addba_resp_timer.data = (unsigned long)&sta->timer_to_tid[tid]; init_timer(&tid_tx->addba_resp_timer); + /* tx timer */ + tid_tx->session_timer.function = sta_tx_agg_session_timer_expired; + tid_tx->session_timer.data = (unsigned long)&sta->timer_to_tid[tid]; + init_timer(&tid_tx->session_timer); + /* assign a dialog token */ sta->ampdu_mlme.dialog_token_allocator++; tid_tx->dialog_token = sta->ampdu_mlme.dialog_token_allocator; @@ -451,38 +563,6 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid, } EXPORT_SYMBOL(ieee80211_start_tx_ba_session); -/* - * splice packets from the STA's pending to the local pending, - * requires a call to ieee80211_agg_splice_finish later - */ -static void __acquires(agg_queue) -ieee80211_agg_splice_packets(struct ieee80211_local *local, - struct tid_ampdu_tx *tid_tx, u16 tid) -{ - int queue = ieee80211_ac_from_tid(tid); - unsigned long flags; - - ieee80211_stop_queue_agg(local, tid); - - if (WARN(!tid_tx, "TID %d gone but expected when splicing aggregates" - " from the pending queue\n", tid)) - return; - - if (!skb_queue_empty(&tid_tx->pending)) { - spin_lock_irqsave(&local->queue_stop_reason_lock, flags); - /* copy over remaining packets */ - skb_queue_splice_tail_init(&tid_tx->pending, - &local->pending[queue]); - spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); - } -} - -static void __releases(agg_queue) -ieee80211_agg_splice_finish(struct ieee80211_local *local, u16 tid) -{ - ieee80211_wake_queue_agg(local, tid); -} - static void ieee80211_agg_tx_operational(struct ieee80211_local *local, struct sta_info *sta, u16 tid) { @@ -536,7 +616,7 @@ void ieee80211_start_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u16 tid) } mutex_lock(&local->sta_mtx); - sta = sta_info_get(sdata, ra); + sta = sta_info_get_bss(sdata, ra); if (!sta) { mutex_unlock(&local->sta_mtx); #ifdef CONFIG_MAC80211_HT_DEBUG @@ -574,14 +654,9 @@ void ieee80211_start_tx_ba_cb_irqsafe(struct ieee80211_vif *vif, struct ieee80211_ra_tid *ra_tid; struct sk_buff *skb = dev_alloc_skb(0); - if (unlikely(!skb)) { -#ifdef CONFIG_MAC80211_HT_DEBUG - if (net_ratelimit()) - printk(KERN_WARNING "%s: Not enough memory, " - "dropping start BA session", sdata->name); -#endif + if (unlikely(!skb)) return; - } + ra_tid = (struct ieee80211_ra_tid *) &skb->cb; memcpy(&ra_tid->ra, ra, ETH_ALEN); ra_tid->tid = tid; @@ -670,7 +745,7 @@ void ieee80211_stop_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u8 tid) mutex_lock(&local->sta_mtx); - sta = sta_info_get(sdata, ra); + sta = sta_info_get_bss(sdata, ra); if (!sta) { #ifdef CONFIG_MAC80211_HT_DEBUG printk(KERN_DEBUG "Could not find station: %pM\n", ra); @@ -727,14 +802,9 @@ void ieee80211_stop_tx_ba_cb_irqsafe(struct ieee80211_vif *vif, struct ieee80211_ra_tid *ra_tid; struct sk_buff *skb = dev_alloc_skb(0); - if (unlikely(!skb)) { -#ifdef CONFIG_MAC80211_HT_DEBUG - if (net_ratelimit()) - printk(KERN_WARNING "%s: Not enough memory, " - "dropping stop BA session", sdata->name); -#endif + if (unlikely(!skb)) return; - } + ra_tid = (struct ieee80211_ra_tid *) &skb->cb; memcpy(&ra_tid->ra, ra, ETH_ALEN); ra_tid->tid = tid; @@ -772,23 +842,35 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local, goto out; } - del_timer(&tid_tx->addba_resp_timer); + del_timer_sync(&tid_tx->addba_resp_timer); #ifdef CONFIG_MAC80211_HT_DEBUG printk(KERN_DEBUG "switched off addBA timer for tid %d\n", tid); #endif - if (le16_to_cpu(mgmt->u.action.u.addba_resp.status) - == WLAN_STATUS_SUCCESS) { - /* - * IEEE 802.11-2007 7.3.1.14: - * In an ADDBA Response frame, when the Status Code field - * is set to 0, the Buffer Size subfield is set to a value - * of at least 1. - */ - if (!buf_size) - goto out; + /* + * addba_resp_timer may have fired before we got here, and + * caused WANT_STOP to be set. If the stop then was already + * processed further, STOPPING might be set. + */ + if (test_bit(HT_AGG_STATE_WANT_STOP, &tid_tx->state) || + test_bit(HT_AGG_STATE_STOPPING, &tid_tx->state)) { +#ifdef CONFIG_MAC80211_HT_DEBUG + printk(KERN_DEBUG + "got addBA resp for tid %d but we already gave up\n", + tid); +#endif + goto out; + } + /* + * IEEE 802.11-2007 7.3.1.14: + * In an ADDBA Response frame, when the Status Code field + * is set to 0, the Buffer Size subfield is set to a value + * of at least 1. + */ + if (le16_to_cpu(mgmt->u.action.u.addba_resp.status) + == WLAN_STATUS_SUCCESS && buf_size) { if (test_and_set_bit(HT_AGG_STATE_RESPONSE_RECEIVED, &tid_tx->state)) { /* ignore duplicate response */ @@ -801,6 +883,11 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local, ieee80211_agg_tx_operational(local, sta, tid); sta->ampdu_mlme.addba_req_num[tid] = 0; + + if (tid_tx->timeout) + mod_timer(&tid_tx->session_timer, + TU_TO_EXP_TIME(tid_tx->timeout)); + } else { ___ieee80211_stop_tx_ba_session(sta, tid, WLAN_BACK_INITIATOR, true); diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 3d1b091d9b2e..e60df48fa4d4 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -12,6 +12,7 @@ #include <linux/slab.h> #include <net/net_namespace.h> #include <linux/rcupdate.h> +#include <linux/if_ether.h> #include <net/cfg80211.h> #include "ieee80211_i.h" #include "driver-ops.h" @@ -62,7 +63,7 @@ static int ieee80211_change_iface(struct wiphy *wiphy, if (type == NL80211_IFTYPE_AP_VLAN && params && params->use_4addr == 0) - rcu_assign_pointer(sdata->u.vlan.sta, NULL); + RCU_INIT_POINTER(sdata->u.vlan.sta, NULL); else if (type == NL80211_IFTYPE_STATION && params && params->use_4addr >= 0) sdata->u.mgd.use_4addr = params->use_4addr; @@ -101,6 +102,16 @@ static int ieee80211_change_iface(struct wiphy *wiphy, return 0; } +static int ieee80211_set_noack_map(struct wiphy *wiphy, + struct net_device *dev, + u16 noack_map) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + + sdata->noack_map = noack_map; + return 0; +} + static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev, u8 key_idx, bool pairwise, const u8 *mac_addr, struct key_params *params) @@ -343,7 +354,9 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) STATION_INFO_RX_BITRATE | STATION_INFO_RX_DROP_MISC | STATION_INFO_BSS_PARAM | - STATION_INFO_CONNECTED_TIME; + STATION_INFO_CONNECTED_TIME | + STATION_INFO_STA_FLAGS | + STATION_INFO_BEACON_LOSS_COUNT; do_posix_clock_monotonic_gettime(&uptime); sinfo->connected_time = uptime.tv_sec - sta->last_connected; @@ -356,6 +369,7 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) sinfo->tx_retries = sta->tx_retry_count; sinfo->tx_failed = sta->tx_retry_failed; sinfo->rx_dropped_misc = sta->rx_dropped; + sinfo->beacon_loss_count = sta->beacon_loss_count; if ((sta->local->hw.flags & IEEE80211_HW_SIGNAL_DBM) || (sta->local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC)) { @@ -403,6 +417,26 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) sinfo->bss_param.flags |= BSS_PARAM_FLAGS_SHORT_SLOT_TIME; sinfo->bss_param.dtim_period = sdata->local->hw.conf.ps_dtim_period; sinfo->bss_param.beacon_interval = sdata->vif.bss_conf.beacon_int; + + sinfo->sta_flags.set = 0; + sinfo->sta_flags.mask = BIT(NL80211_STA_FLAG_AUTHORIZED) | + BIT(NL80211_STA_FLAG_SHORT_PREAMBLE) | + BIT(NL80211_STA_FLAG_WME) | + BIT(NL80211_STA_FLAG_MFP) | + BIT(NL80211_STA_FLAG_AUTHENTICATED) | + BIT(NL80211_STA_FLAG_TDLS_PEER); + if (test_sta_flag(sta, WLAN_STA_AUTHORIZED)) + sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_AUTHORIZED); + if (test_sta_flag(sta, WLAN_STA_SHORT_PREAMBLE)) + sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_SHORT_PREAMBLE); + if (test_sta_flag(sta, WLAN_STA_WME)) + sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_WME); + if (test_sta_flag(sta, WLAN_STA_MFP)) + sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_MFP); + if (test_sta_flag(sta, WLAN_STA_AUTH)) + sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_AUTHENTICATED); + if (test_sta_flag(sta, WLAN_STA_TDLS_PEER)) + sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_TDLS_PEER); } @@ -455,6 +489,45 @@ static int ieee80211_get_station(struct wiphy *wiphy, struct net_device *dev, return ret; } +static void ieee80211_config_ap_ssid(struct ieee80211_sub_if_data *sdata, + struct beacon_parameters *params) +{ + struct ieee80211_bss_conf *bss_conf = &sdata->vif.bss_conf; + + bss_conf->ssid_len = params->ssid_len; + + if (params->ssid_len) + memcpy(bss_conf->ssid, params->ssid, params->ssid_len); + + bss_conf->hidden_ssid = + (params->hidden_ssid != NL80211_HIDDEN_SSID_NOT_IN_USE); +} + +static int ieee80211_set_probe_resp(struct ieee80211_sub_if_data *sdata, + u8 *resp, size_t resp_len) +{ + struct sk_buff *new, *old; + + if (!resp || !resp_len) + return -EINVAL; + + old = rtnl_dereference(sdata->u.ap.probe_resp); + + new = dev_alloc_skb(resp_len); + if (!new) + return -ENOMEM; + + memcpy(skb_put(new, resp_len), resp, resp_len); + + rcu_assign_pointer(sdata->u.ap.probe_resp, new); + synchronize_rcu(); + + if (old) + dev_kfree_skb(old); + + return 0; +} + /* * This handles both adding a beacon and setting new beacon info */ @@ -465,6 +538,7 @@ static int ieee80211_config_beacon(struct ieee80211_sub_if_data *sdata, int new_head_len, new_tail_len; int size; int err = -EINVAL; + u32 changed = 0; old = rtnl_dereference(sdata->u.ap.beacon); @@ -548,8 +622,17 @@ static int ieee80211_config_beacon(struct ieee80211_sub_if_data *sdata, kfree(old); - ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED | - BSS_CHANGED_BEACON); + err = ieee80211_set_probe_resp(sdata, params->probe_resp, + params->probe_resp_len); + if (!err) + changed |= BSS_CHANGED_AP_PROBE_RESP; + + ieee80211_config_ap_ssid(sdata, params); + changed |= BSS_CHANGED_BEACON_ENABLED | + BSS_CHANGED_BEACON | + BSS_CHANGED_SSID; + + ieee80211_bss_info_change_notify(sdata, changed); return 0; } @@ -558,6 +641,8 @@ static int ieee80211_add_beacon(struct wiphy *wiphy, struct net_device *dev, { struct ieee80211_sub_if_data *sdata; struct beacon_data *old; + struct ieee80211_sub_if_data *vlan; + int ret; sdata = IEEE80211_DEV_TO_SUB_IF(dev); @@ -565,7 +650,24 @@ static int ieee80211_add_beacon(struct wiphy *wiphy, struct net_device *dev, if (old) return -EALREADY; - return ieee80211_config_beacon(sdata, params); + ret = ieee80211_config_beacon(sdata, params); + if (ret) + return ret; + + /* + * Apply control port protocol, this allows us to + * not encrypt dynamic WEP control frames. + */ + sdata->control_port_protocol = params->crypto.control_port_ethertype; + sdata->control_port_no_encrypt = params->crypto.control_port_no_encrypt; + list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) { + vlan->control_port_protocol = + params->crypto.control_port_ethertype; + vlan->control_port_no_encrypt = + params->crypto.control_port_no_encrypt; + } + + return 0; } static int ieee80211_set_beacon(struct wiphy *wiphy, struct net_device *dev, @@ -594,7 +696,7 @@ static int ieee80211_del_beacon(struct wiphy *wiphy, struct net_device *dev) if (!old) return -ENOENT; - rcu_assign_pointer(sdata->u.ap.beacon, NULL); + RCU_INIT_POINTER(sdata->u.ap.beacon, NULL); synchronize_rcu(); kfree(old); @@ -646,11 +748,11 @@ static void ieee80211_send_layer2_update(struct sta_info *sta) netif_rx_ni(skb); } -static void sta_apply_parameters(struct ieee80211_local *local, - struct sta_info *sta, - struct station_parameters *params) +static int sta_apply_parameters(struct ieee80211_local *local, + struct sta_info *sta, + struct station_parameters *params) { - unsigned long flags; + int ret = 0; u32 rates; int i, j; struct ieee80211_supported_band *sband; @@ -659,43 +761,97 @@ static void sta_apply_parameters(struct ieee80211_local *local, sband = local->hw.wiphy->bands[local->oper_channel->band]; - spin_lock_irqsave(&sta->flaglock, flags); mask = params->sta_flags_mask; set = params->sta_flags_set; + /* + * In mesh mode, we can clear AUTHENTICATED flag but must + * also make ASSOCIATED follow appropriately for the driver + * API. See also below, after AUTHORIZED changes. + */ + if (mask & BIT(NL80211_STA_FLAG_AUTHENTICATED)) { + /* cfg80211 should not allow this in non-mesh modes */ + if (WARN_ON(!ieee80211_vif_is_mesh(&sdata->vif))) + return -EINVAL; + + if (set & BIT(NL80211_STA_FLAG_AUTHENTICATED) && + !test_sta_flag(sta, WLAN_STA_AUTH)) { + ret = sta_info_move_state_checked(sta, + IEEE80211_STA_AUTH); + if (ret) + return ret; + ret = sta_info_move_state_checked(sta, + IEEE80211_STA_ASSOC); + if (ret) + return ret; + } + } + if (mask & BIT(NL80211_STA_FLAG_AUTHORIZED)) { - sta->flags &= ~WLAN_STA_AUTHORIZED; if (set & BIT(NL80211_STA_FLAG_AUTHORIZED)) - sta->flags |= WLAN_STA_AUTHORIZED; + ret = sta_info_move_state_checked(sta, + IEEE80211_STA_AUTHORIZED); + else + ret = sta_info_move_state_checked(sta, + IEEE80211_STA_ASSOC); + if (ret) + return ret; + } + + if (mask & BIT(NL80211_STA_FLAG_AUTHENTICATED)) { + /* cfg80211 should not allow this in non-mesh modes */ + if (WARN_ON(!ieee80211_vif_is_mesh(&sdata->vif))) + return -EINVAL; + + if (!(set & BIT(NL80211_STA_FLAG_AUTHENTICATED)) && + test_sta_flag(sta, WLAN_STA_AUTH)) { + ret = sta_info_move_state_checked(sta, + IEEE80211_STA_AUTH); + if (ret) + return ret; + ret = sta_info_move_state_checked(sta, + IEEE80211_STA_NONE); + if (ret) + return ret; + } } + if (mask & BIT(NL80211_STA_FLAG_SHORT_PREAMBLE)) { - sta->flags &= ~WLAN_STA_SHORT_PREAMBLE; if (set & BIT(NL80211_STA_FLAG_SHORT_PREAMBLE)) - sta->flags |= WLAN_STA_SHORT_PREAMBLE; + set_sta_flag(sta, WLAN_STA_SHORT_PREAMBLE); + else + clear_sta_flag(sta, WLAN_STA_SHORT_PREAMBLE); } if (mask & BIT(NL80211_STA_FLAG_WME)) { - sta->flags &= ~WLAN_STA_WME; - sta->sta.wme = false; if (set & BIT(NL80211_STA_FLAG_WME)) { - sta->flags |= WLAN_STA_WME; + set_sta_flag(sta, WLAN_STA_WME); sta->sta.wme = true; + } else { + clear_sta_flag(sta, WLAN_STA_WME); + sta->sta.wme = false; } } if (mask & BIT(NL80211_STA_FLAG_MFP)) { - sta->flags &= ~WLAN_STA_MFP; if (set & BIT(NL80211_STA_FLAG_MFP)) - sta->flags |= WLAN_STA_MFP; + set_sta_flag(sta, WLAN_STA_MFP); + else + clear_sta_flag(sta, WLAN_STA_MFP); } - if (mask & BIT(NL80211_STA_FLAG_AUTHENTICATED)) { - sta->flags &= ~WLAN_STA_AUTH; - if (set & BIT(NL80211_STA_FLAG_AUTHENTICATED)) - sta->flags |= WLAN_STA_AUTH; + if (mask & BIT(NL80211_STA_FLAG_TDLS_PEER)) { + if (set & BIT(NL80211_STA_FLAG_TDLS_PEER)) + set_sta_flag(sta, WLAN_STA_TDLS_PEER); + else + clear_sta_flag(sta, WLAN_STA_TDLS_PEER); + } + + if (params->sta_modify_mask & STATION_PARAM_APPLY_UAPSD) { + sta->sta.uapsd_queues = params->uapsd_queues; + sta->sta.max_sp = params->max_sp; } - spin_unlock_irqrestore(&sta->flaglock, flags); /* * cfg80211 validates this (1-2007) and allows setting the AID @@ -728,7 +884,7 @@ static void sta_apply_parameters(struct ieee80211_local *local, } if (params->ht_capa) - ieee80211_ht_cap_ie_to_sta_ht_cap(sband, + ieee80211_ht_cap_ie_to_sta_ht_cap(sdata, sband, params->ht_capa, &sta->sta.ht_cap); @@ -756,6 +912,8 @@ static void sta_apply_parameters(struct ieee80211_local *local, } #endif } + + return 0; } static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev, @@ -786,11 +944,21 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev, if (!sta) return -ENOMEM; - sta->flags = WLAN_STA_AUTH | WLAN_STA_ASSOC; + sta_info_move_state(sta, IEEE80211_STA_AUTH); + sta_info_move_state(sta, IEEE80211_STA_ASSOC); - sta_apply_parameters(local, sta, params); + err = sta_apply_parameters(local, sta, params); + if (err) { + sta_info_free(local, sta); + return err; + } - rate_control_rate_init(sta); + /* + * for TDLS, rate control should be initialized only when supported + * rates are known. + */ + if (!test_sta_flag(sta, WLAN_STA_TDLS_PEER)) + rate_control_rate_init(sta); layer2_update = sdata->vif.type == NL80211_IFTYPE_AP_VLAN || sdata->vif.type == NL80211_IFTYPE_AP; @@ -834,26 +1002,34 @@ static int ieee80211_change_station(struct wiphy *wiphy, struct sta_info *sta; struct ieee80211_sub_if_data *vlansdata; - rcu_read_lock(); + mutex_lock(&local->sta_mtx); sta = sta_info_get_bss(sdata, mac); if (!sta) { - rcu_read_unlock(); + mutex_unlock(&local->sta_mtx); return -ENOENT; } + /* in station mode, supported rates are only valid with TDLS */ + if (sdata->vif.type == NL80211_IFTYPE_STATION && + params->supported_rates && + !test_sta_flag(sta, WLAN_STA_TDLS_PEER)) { + mutex_unlock(&local->sta_mtx); + return -EINVAL; + } + if (params->vlan && params->vlan != sta->sdata->dev) { vlansdata = IEEE80211_DEV_TO_SUB_IF(params->vlan); if (vlansdata->vif.type != NL80211_IFTYPE_AP_VLAN && vlansdata->vif.type != NL80211_IFTYPE_AP) { - rcu_read_unlock(); + mutex_unlock(&local->sta_mtx); return -EINVAL; } if (params->vlan->ieee80211_ptr->use_4addr) { if (vlansdata->u.vlan.sta) { - rcu_read_unlock(); + mutex_unlock(&local->sta_mtx); return -EBUSY; } @@ -866,7 +1042,10 @@ static int ieee80211_change_station(struct wiphy *wiphy, sta_apply_parameters(local, sta, params); - rcu_read_unlock(); + if (test_sta_flag(sta, WLAN_STA_TDLS_PEER) && params->supported_rates) + rate_control_rate_init(sta); + + mutex_unlock(&local->sta_mtx); if (sdata->vif.type == NL80211_IFTYPE_STATION && params->sta_flags_mask & BIT(NL80211_STA_FLAG_AUTHORIZED)) @@ -918,7 +1097,7 @@ static int ieee80211_del_mpath(struct wiphy *wiphy, struct net_device *dev, if (dst) return mesh_path_del(dst, sdata); - mesh_path_flush(sdata); + mesh_path_flush_by_iface(sdata); return 0; } @@ -1058,6 +1237,8 @@ static int copy_mesh_setup(struct ieee80211_if_mesh *ifmsh, { u8 *new_ie; const u8 *old_ie; + struct ieee80211_sub_if_data *sdata = container_of(ifmsh, + struct ieee80211_sub_if_data, u.mesh); /* allocate information elements */ new_ie = NULL; @@ -1084,6 +1265,10 @@ static int copy_mesh_setup(struct ieee80211_if_mesh *ifmsh, if (setup->is_secure) ifmsh->security |= IEEE80211_MESH_SEC_SECURED; + /* mcast rate setting in Mesh Node */ + memcpy(sdata->vif.bss_conf.mcast_rate, setup->mcast_rate, + sizeof(setup->mcast_rate)); + return 0; } @@ -1129,6 +1314,9 @@ static int ieee80211_update_mesh_config(struct wiphy *wiphy, if (_chg_mesh_attr(NL80211_MESHCONF_HWMP_PREQ_MIN_INTERVAL, mask)) conf->dot11MeshHWMPpreqMinInterval = nconf->dot11MeshHWMPpreqMinInterval; + if (_chg_mesh_attr(NL80211_MESHCONF_HWMP_PERR_MIN_INTERVAL, mask)) + conf->dot11MeshHWMPperrMinInterval = + nconf->dot11MeshHWMPperrMinInterval; if (_chg_mesh_attr(NL80211_MESHCONF_HWMP_NET_DIAM_TRVS_TIME, mask)) conf->dot11MeshHWMPnetDiameterTraversalTime = @@ -1137,6 +1325,22 @@ static int ieee80211_update_mesh_config(struct wiphy *wiphy, conf->dot11MeshHWMPRootMode = nconf->dot11MeshHWMPRootMode; ieee80211_mesh_root_setup(ifmsh); } + if (_chg_mesh_attr(NL80211_MESHCONF_GATE_ANNOUNCEMENTS, mask)) { + /* our current gate announcement implementation rides on root + * announcements, so require this ifmsh to also be a root node + * */ + if (nconf->dot11MeshGateAnnouncementProtocol && + !conf->dot11MeshHWMPRootMode) { + conf->dot11MeshHWMPRootMode = 1; + ieee80211_mesh_root_setup(ifmsh); + } + conf->dot11MeshGateAnnouncementProtocol = + nconf->dot11MeshGateAnnouncementProtocol; + } + if (_chg_mesh_attr(NL80211_MESHCONF_HWMP_RANN_INTERVAL, mask)) { + conf->dot11MeshHWMPRannInterval = + nconf->dot11MeshHWMPRannInterval; + } return 0; } @@ -1235,9 +1439,11 @@ static int ieee80211_change_bss(struct wiphy *wiphy, } static int ieee80211_set_txq_params(struct wiphy *wiphy, + struct net_device *dev, struct ieee80211_txq_params *params) { struct ieee80211_local *local = wiphy_priv(wiphy); + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_tx_queue_params p; if (!local->ops->conf_tx) @@ -1258,8 +1464,8 @@ static int ieee80211_set_txq_params(struct wiphy *wiphy, if (params->queue >= local->hw.queues) return -EINVAL; - local->tx_conf[params->queue] = p; - if (drv_conf_tx(local, params->queue, &p)) { + sdata->tx_conf[params->queue] = p; + if (drv_conf_tx(local, sdata, params->queue, &p)) { wiphy_debug(local->hw.wiphy, "failed to set TX queue parameters for queue %d\n", params->queue); @@ -1311,7 +1517,7 @@ static int ieee80211_set_channel(struct wiphy *wiphy, (old_oper_type != local->_oper_channel_type)) ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL); - if ((sdata && sdata->vif.type != NL80211_IFTYPE_MONITOR) && + if (sdata && sdata->vif.type != NL80211_IFTYPE_MONITOR && old_vif_oper_type != sdata->vif.bss_conf.channel_type) ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_HT); @@ -1821,7 +2027,7 @@ ieee80211_offchan_tx_done(struct ieee80211_work *wk, struct sk_buff *skb) * so in that case userspace will have to deal with it. */ - if (wk->offchan_tx.wait && wk->offchan_tx.frame) + if (wk->offchan_tx.wait && !wk->offchan_tx.status) cfg80211_mgmt_tx_status(wk->sdata->dev, (unsigned long) wk->offchan_tx.frame, wk->ie, wk->ie_len, false, GFP_KERNEL); @@ -1833,7 +2039,8 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct net_device *dev, struct ieee80211_channel *chan, bool offchan, enum nl80211_channel_type channel_type, bool channel_type_valid, unsigned int wait, - const u8 *buf, size_t len, u64 *cookie) + const u8 *buf, size_t len, bool no_cck, + bool dont_wait_for_ack, u64 *cookie) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; @@ -1841,10 +2048,15 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct net_device *dev, struct sta_info *sta; struct ieee80211_work *wk; const struct ieee80211_mgmt *mgmt = (void *)buf; - u32 flags = IEEE80211_TX_INTFL_NL80211_FRAME_TX | - IEEE80211_TX_CTL_REQ_TX_STATUS; + u32 flags; bool is_offchan = false; + if (dont_wait_for_ack) + flags = IEEE80211_TX_CTL_NO_ACK; + else + flags = IEEE80211_TX_INTFL_NL80211_FRAME_TX | + IEEE80211_TX_CTL_REQ_TX_STATUS; + /* Check that we are on the requested channel for transmission */ if (chan != local->tmp_channel && chan != local->oper_channel) @@ -1860,6 +2072,9 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct net_device *dev, flags |= IEEE80211_TX_CTL_TX_OFFCHAN; } + if (no_cck) + flags |= IEEE80211_TX_CTL_NO_CCK_RATE; + if (is_offchan && !offchan) return -EBUSY; @@ -1898,33 +2113,6 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct net_device *dev, *cookie = (unsigned long) skb; - if (is_offchan && local->ops->offchannel_tx) { - int ret; - - IEEE80211_SKB_CB(skb)->band = chan->band; - - mutex_lock(&local->mtx); - - if (local->hw_offchan_tx_cookie) { - mutex_unlock(&local->mtx); - return -EBUSY; - } - - /* TODO: bitrate control, TX processing? */ - ret = drv_offchannel_tx(local, skb, chan, channel_type, wait); - - if (ret == 0) - local->hw_offchan_tx_cookie = *cookie; - mutex_unlock(&local->mtx); - - /* - * Allow driver to return 1 to indicate it wants to have the - * frame transmitted with a remain_on_channel + regular TX. - */ - if (ret != 1) - return ret; - } - if (is_offchan && local->ops->remain_on_channel) { unsigned int duration; int ret; @@ -2011,18 +2199,6 @@ static int ieee80211_mgmt_tx_cancel_wait(struct wiphy *wiphy, mutex_lock(&local->mtx); - if (local->ops->offchannel_tx_cancel_wait && - local->hw_offchan_tx_cookie == cookie) { - ret = drv_offchannel_tx_cancel_wait(local); - - if (!ret) - local->hw_offchan_tx_cookie = 0; - - mutex_unlock(&local->mtx); - - return ret; - } - if (local->ops->cancel_remain_on_channel) { cookie ^= 2; ret = ieee80211_cancel_remain_on_channel_hw(local, cookie); @@ -2123,6 +2299,399 @@ static int ieee80211_set_rekey_data(struct wiphy *wiphy, return 0; } +static void ieee80211_tdls_add_ext_capab(struct sk_buff *skb) +{ + u8 *pos = (void *)skb_put(skb, 7); + + *pos++ = WLAN_EID_EXT_CAPABILITY; + *pos++ = 5; /* len */ + *pos++ = 0x0; + *pos++ = 0x0; + *pos++ = 0x0; + *pos++ = 0x0; + *pos++ = WLAN_EXT_CAPA5_TDLS_ENABLED; +} + +static u16 ieee80211_get_tdls_sta_capab(struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_local *local = sdata->local; + u16 capab; + + capab = 0; + if (local->oper_channel->band != IEEE80211_BAND_2GHZ) + return capab; + + if (!(local->hw.flags & IEEE80211_HW_2GHZ_SHORT_SLOT_INCAPABLE)) + capab |= WLAN_CAPABILITY_SHORT_SLOT_TIME; + if (!(local->hw.flags & IEEE80211_HW_2GHZ_SHORT_PREAMBLE_INCAPABLE)) + capab |= WLAN_CAPABILITY_SHORT_PREAMBLE; + + return capab; +} + +static void ieee80211_tdls_add_link_ie(struct sk_buff *skb, u8 *src_addr, + u8 *peer, u8 *bssid) +{ + struct ieee80211_tdls_lnkie *lnkid; + + lnkid = (void *)skb_put(skb, sizeof(struct ieee80211_tdls_lnkie)); + + lnkid->ie_type = WLAN_EID_LINK_ID; + lnkid->ie_len = sizeof(struct ieee80211_tdls_lnkie) - 2; + + memcpy(lnkid->bssid, bssid, ETH_ALEN); + memcpy(lnkid->init_sta, src_addr, ETH_ALEN); + memcpy(lnkid->resp_sta, peer, ETH_ALEN); +} + +static int +ieee80211_prep_tdls_encap_data(struct wiphy *wiphy, struct net_device *dev, + u8 *peer, u8 action_code, u8 dialog_token, + u16 status_code, struct sk_buff *skb) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_tdls_data *tf; + + tf = (void *)skb_put(skb, offsetof(struct ieee80211_tdls_data, u)); + + memcpy(tf->da, peer, ETH_ALEN); + memcpy(tf->sa, sdata->vif.addr, ETH_ALEN); + tf->ether_type = cpu_to_be16(ETH_P_TDLS); + tf->payload_type = WLAN_TDLS_SNAP_RFTYPE; + + switch (action_code) { + case WLAN_TDLS_SETUP_REQUEST: + tf->category = WLAN_CATEGORY_TDLS; + tf->action_code = WLAN_TDLS_SETUP_REQUEST; + + skb_put(skb, sizeof(tf->u.setup_req)); + tf->u.setup_req.dialog_token = dialog_token; + tf->u.setup_req.capability = + cpu_to_le16(ieee80211_get_tdls_sta_capab(sdata)); + + ieee80211_add_srates_ie(&sdata->vif, skb); + ieee80211_add_ext_srates_ie(&sdata->vif, skb); + ieee80211_tdls_add_ext_capab(skb); + break; + case WLAN_TDLS_SETUP_RESPONSE: + tf->category = WLAN_CATEGORY_TDLS; + tf->action_code = WLAN_TDLS_SETUP_RESPONSE; + + skb_put(skb, sizeof(tf->u.setup_resp)); + tf->u.setup_resp.status_code = cpu_to_le16(status_code); + tf->u.setup_resp.dialog_token = dialog_token; + tf->u.setup_resp.capability = + cpu_to_le16(ieee80211_get_tdls_sta_capab(sdata)); + + ieee80211_add_srates_ie(&sdata->vif, skb); + ieee80211_add_ext_srates_ie(&sdata->vif, skb); + ieee80211_tdls_add_ext_capab(skb); + break; + case WLAN_TDLS_SETUP_CONFIRM: + tf->category = WLAN_CATEGORY_TDLS; + tf->action_code = WLAN_TDLS_SETUP_CONFIRM; + + skb_put(skb, sizeof(tf->u.setup_cfm)); + tf->u.setup_cfm.status_code = cpu_to_le16(status_code); + tf->u.setup_cfm.dialog_token = dialog_token; + break; + case WLAN_TDLS_TEARDOWN: + tf->category = WLAN_CATEGORY_TDLS; + tf->action_code = WLAN_TDLS_TEARDOWN; + + skb_put(skb, sizeof(tf->u.teardown)); + tf->u.teardown.reason_code = cpu_to_le16(status_code); + break; + case WLAN_TDLS_DISCOVERY_REQUEST: + tf->category = WLAN_CATEGORY_TDLS; + tf->action_code = WLAN_TDLS_DISCOVERY_REQUEST; + + skb_put(skb, sizeof(tf->u.discover_req)); + tf->u.discover_req.dialog_token = dialog_token; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int +ieee80211_prep_tdls_direct(struct wiphy *wiphy, struct net_device *dev, + u8 *peer, u8 action_code, u8 dialog_token, + u16 status_code, struct sk_buff *skb) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_mgmt *mgmt; + + mgmt = (void *)skb_put(skb, 24); + memset(mgmt, 0, 24); + memcpy(mgmt->da, peer, ETH_ALEN); + memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN); + memcpy(mgmt->bssid, sdata->u.mgd.bssid, ETH_ALEN); + + mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | + IEEE80211_STYPE_ACTION); + + switch (action_code) { + case WLAN_PUB_ACTION_TDLS_DISCOVER_RES: + skb_put(skb, 1 + sizeof(mgmt->u.action.u.tdls_discover_resp)); + mgmt->u.action.category = WLAN_CATEGORY_PUBLIC; + mgmt->u.action.u.tdls_discover_resp.action_code = + WLAN_PUB_ACTION_TDLS_DISCOVER_RES; + mgmt->u.action.u.tdls_discover_resp.dialog_token = + dialog_token; + mgmt->u.action.u.tdls_discover_resp.capability = + cpu_to_le16(ieee80211_get_tdls_sta_capab(sdata)); + + ieee80211_add_srates_ie(&sdata->vif, skb); + ieee80211_add_ext_srates_ie(&sdata->vif, skb); + ieee80211_tdls_add_ext_capab(skb); + break; + default: + return -EINVAL; + } + + return 0; +} + +static int ieee80211_tdls_mgmt(struct wiphy *wiphy, struct net_device *dev, + u8 *peer, u8 action_code, u8 dialog_token, + u16 status_code, const u8 *extra_ies, + size_t extra_ies_len) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_local *local = sdata->local; + struct ieee80211_tx_info *info; + struct sk_buff *skb = NULL; + bool send_direct; + int ret; + + if (!(wiphy->flags & WIPHY_FLAG_SUPPORTS_TDLS)) + return -ENOTSUPP; + + /* make sure we are in managed mode, and associated */ + if (sdata->vif.type != NL80211_IFTYPE_STATION || + !sdata->u.mgd.associated) + return -EINVAL; + +#ifdef CONFIG_MAC80211_VERBOSE_TDLS_DEBUG + printk(KERN_DEBUG "TDLS mgmt action %d peer %pM\n", action_code, peer); +#endif + + skb = dev_alloc_skb(local->hw.extra_tx_headroom + + max(sizeof(struct ieee80211_mgmt), + sizeof(struct ieee80211_tdls_data)) + + 50 + /* supported rates */ + 7 + /* ext capab */ + extra_ies_len + + sizeof(struct ieee80211_tdls_lnkie)); + if (!skb) + return -ENOMEM; + + info = IEEE80211_SKB_CB(skb); + skb_reserve(skb, local->hw.extra_tx_headroom); + + switch (action_code) { + case WLAN_TDLS_SETUP_REQUEST: + case WLAN_TDLS_SETUP_RESPONSE: + case WLAN_TDLS_SETUP_CONFIRM: + case WLAN_TDLS_TEARDOWN: + case WLAN_TDLS_DISCOVERY_REQUEST: + ret = ieee80211_prep_tdls_encap_data(wiphy, dev, peer, + action_code, dialog_token, + status_code, skb); + send_direct = false; + break; + case WLAN_PUB_ACTION_TDLS_DISCOVER_RES: + ret = ieee80211_prep_tdls_direct(wiphy, dev, peer, action_code, + dialog_token, status_code, + skb); + send_direct = true; + break; + default: + ret = -ENOTSUPP; + break; + } + + if (ret < 0) + goto fail; + + if (extra_ies_len) + memcpy(skb_put(skb, extra_ies_len), extra_ies, extra_ies_len); + + /* the TDLS link IE is always added last */ + switch (action_code) { + case WLAN_TDLS_SETUP_REQUEST: + case WLAN_TDLS_SETUP_CONFIRM: + case WLAN_TDLS_TEARDOWN: + case WLAN_TDLS_DISCOVERY_REQUEST: + /* we are the initiator */ + ieee80211_tdls_add_link_ie(skb, sdata->vif.addr, peer, + sdata->u.mgd.bssid); + break; + case WLAN_TDLS_SETUP_RESPONSE: + case WLAN_PUB_ACTION_TDLS_DISCOVER_RES: + /* we are the responder */ + ieee80211_tdls_add_link_ie(skb, peer, sdata->vif.addr, + sdata->u.mgd.bssid); + break; + default: + ret = -ENOTSUPP; + goto fail; + } + + if (send_direct) { + ieee80211_tx_skb(sdata, skb); + return 0; + } + + /* + * According to 802.11z: Setup req/resp are sent in AC_BK, otherwise + * we should default to AC_VI. + */ + switch (action_code) { + case WLAN_TDLS_SETUP_REQUEST: + case WLAN_TDLS_SETUP_RESPONSE: + skb_set_queue_mapping(skb, IEEE80211_AC_BK); + skb->priority = 2; + break; + default: + skb_set_queue_mapping(skb, IEEE80211_AC_VI); + skb->priority = 5; + break; + } + + /* disable bottom halves when entering the Tx path */ + local_bh_disable(); + ret = ieee80211_subif_start_xmit(skb, dev); + local_bh_enable(); + + return ret; + +fail: + dev_kfree_skb(skb); + return ret; +} + +static int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev, + u8 *peer, enum nl80211_tdls_operation oper) +{ + struct sta_info *sta; + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + + if (!(wiphy->flags & WIPHY_FLAG_SUPPORTS_TDLS)) + return -ENOTSUPP; + + if (sdata->vif.type != NL80211_IFTYPE_STATION) + return -EINVAL; + +#ifdef CONFIG_MAC80211_VERBOSE_TDLS_DEBUG + printk(KERN_DEBUG "TDLS oper %d peer %pM\n", oper, peer); +#endif + + switch (oper) { + case NL80211_TDLS_ENABLE_LINK: + rcu_read_lock(); + sta = sta_info_get(sdata, peer); + if (!sta) { + rcu_read_unlock(); + return -ENOLINK; + } + + set_sta_flag(sta, WLAN_STA_TDLS_PEER_AUTH); + rcu_read_unlock(); + break; + case NL80211_TDLS_DISABLE_LINK: + return sta_info_destroy_addr(sdata, peer); + case NL80211_TDLS_TEARDOWN: + case NL80211_TDLS_SETUP: + case NL80211_TDLS_DISCOVERY_REQ: + /* We don't support in-driver setup/teardown/discovery */ + return -ENOTSUPP; + default: + return -ENOTSUPP; + } + + return 0; +} + +static int ieee80211_probe_client(struct wiphy *wiphy, struct net_device *dev, + const u8 *peer, u64 *cookie) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_local *local = sdata->local; + struct ieee80211_qos_hdr *nullfunc; + struct sk_buff *skb; + int size = sizeof(*nullfunc); + __le16 fc; + bool qos; + struct ieee80211_tx_info *info; + struct sta_info *sta; + + rcu_read_lock(); + sta = sta_info_get(sdata, peer); + if (sta) { + qos = test_sta_flag(sta, WLAN_STA_WME); + rcu_read_unlock(); + } else { + rcu_read_unlock(); + return -ENOLINK; + } + + if (qos) { + fc = cpu_to_le16(IEEE80211_FTYPE_DATA | + IEEE80211_STYPE_QOS_NULLFUNC | + IEEE80211_FCTL_FROMDS); + } else { + size -= 2; + fc = cpu_to_le16(IEEE80211_FTYPE_DATA | + IEEE80211_STYPE_NULLFUNC | + IEEE80211_FCTL_FROMDS); + } + + skb = dev_alloc_skb(local->hw.extra_tx_headroom + size); + if (!skb) + return -ENOMEM; + + skb->dev = dev; + + skb_reserve(skb, local->hw.extra_tx_headroom); + + nullfunc = (void *) skb_put(skb, size); + nullfunc->frame_control = fc; + nullfunc->duration_id = 0; + memcpy(nullfunc->addr1, sta->sta.addr, ETH_ALEN); + memcpy(nullfunc->addr2, sdata->vif.addr, ETH_ALEN); + memcpy(nullfunc->addr3, sdata->vif.addr, ETH_ALEN); + nullfunc->seq_ctrl = 0; + + info = IEEE80211_SKB_CB(skb); + + info->flags |= IEEE80211_TX_CTL_REQ_TX_STATUS | + IEEE80211_TX_INTFL_NL80211_FRAME_TX; + + skb_set_queue_mapping(skb, IEEE80211_AC_VO); + skb->priority = 7; + if (qos) + nullfunc->qos_ctrl = cpu_to_le16(7); + + local_bh_disable(); + ieee80211_xmit(sdata, skb); + local_bh_enable(); + + *cookie = (unsigned long) skb; + return 0; +} + +static struct ieee80211_channel * +ieee80211_wiphy_get_channel(struct wiphy *wiphy) +{ + struct ieee80211_local *local = wiphy_priv(wiphy); + + return local->oper_channel; +} + struct cfg80211_ops mac80211_config_ops = { .add_virtual_intf = ieee80211_add_iface, .del_virtual_intf = ieee80211_del_iface, @@ -2186,4 +2755,9 @@ struct cfg80211_ops mac80211_config_ops = { .set_ringparam = ieee80211_set_ringparam, .get_ringparam = ieee80211_get_ringparam, .set_rekey_data = ieee80211_set_rekey_data, + .tdls_oper = ieee80211_tdls_oper, + .tdls_mgmt = ieee80211_tdls_mgmt, + .probe_client = ieee80211_probe_client, + .get_channel = ieee80211_wiphy_get_channel, + .set_noack_map = ieee80211_set_noack_map, }; diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index 186e02f7cc32..90baea53e7c5 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -78,57 +78,6 @@ DEBUGFS_READONLY_FILE(wep_iv, "%#08x", DEBUGFS_READONLY_FILE(rate_ctrl_alg, "%s", local->rate_ctrl ? local->rate_ctrl->ops->name : "hw/driver"); -static ssize_t tsf_read(struct file *file, char __user *user_buf, - size_t count, loff_t *ppos) -{ - struct ieee80211_local *local = file->private_data; - u64 tsf; - - tsf = drv_get_tsf(local); - - return mac80211_format_buffer(user_buf, count, ppos, "0x%016llx\n", - (unsigned long long) tsf); -} - -static ssize_t tsf_write(struct file *file, - const char __user *user_buf, - size_t count, loff_t *ppos) -{ - struct ieee80211_local *local = file->private_data; - unsigned long long tsf; - char buf[100]; - size_t len; - - len = min(count, sizeof(buf) - 1); - if (copy_from_user(buf, user_buf, len)) - return -EFAULT; - buf[len] = '\0'; - - if (strncmp(buf, "reset", 5) == 0) { - if (local->ops->reset_tsf) { - drv_reset_tsf(local); - wiphy_info(local->hw.wiphy, "debugfs reset TSF\n"); - } - } else { - tsf = simple_strtoul(buf, NULL, 0); - if (local->ops->set_tsf) { - drv_set_tsf(local, tsf); - wiphy_info(local->hw.wiphy, - "debugfs set TSF to %#018llx\n", tsf); - - } - } - - return count; -} - -static const struct file_operations tsf_ops = { - .read = tsf_read, - .write = tsf_write, - .open = mac80211_open_file_generic, - .llseek = default_llseek, -}; - static ssize_t reset_write(struct file *file, const char __user *user_buf, size_t count, loff_t *ppos) { @@ -148,40 +97,6 @@ static const struct file_operations reset_ops = { .llseek = noop_llseek, }; -static ssize_t noack_read(struct file *file, char __user *user_buf, - size_t count, loff_t *ppos) -{ - struct ieee80211_local *local = file->private_data; - - return mac80211_format_buffer(user_buf, count, ppos, "%d\n", - local->wifi_wme_noack_test); -} - -static ssize_t noack_write(struct file *file, - const char __user *user_buf, - size_t count, loff_t *ppos) -{ - struct ieee80211_local *local = file->private_data; - char buf[10]; - size_t len; - - len = min(count, sizeof(buf) - 1); - if (copy_from_user(buf, user_buf, len)) - return -EFAULT; - buf[len] = '\0'; - - local->wifi_wme_noack_test = !!simple_strtoul(buf, NULL, 0); - - return count; -} - -static const struct file_operations noack_ops = { - .read = noack_read, - .write = noack_write, - .open = mac80211_open_file_generic, - .llseek = default_llseek, -}; - static ssize_t uapsd_queues_read(struct file *file, char __user *user_buf, size_t count, loff_t *ppos) { @@ -195,20 +110,12 @@ static ssize_t uapsd_queues_write(struct file *file, size_t count, loff_t *ppos) { struct ieee80211_local *local = file->private_data; - unsigned long val; - char buf[10]; - size_t len; + u8 val; int ret; - len = min(count, sizeof(buf) - 1); - if (copy_from_user(buf, user_buf, len)) - return -EFAULT; - buf[len] = '\0'; - - ret = strict_strtoul(buf, 0, &val); - + ret = kstrtou8_from_user(user_buf, count, 0, &val); if (ret) - return -EINVAL; + return ret; if (val & ~IEEE80211_WMM_IE_STA_QOSINFO_AC_MASK) return -ERANGE; @@ -249,7 +156,7 @@ static ssize_t uapsd_max_sp_len_write(struct file *file, return -EFAULT; buf[len] = '\0'; - ret = strict_strtoul(buf, 0, &val); + ret = kstrtoul(buf, 0, &val); if (ret) return -EINVAL; @@ -305,6 +212,9 @@ static ssize_t hwflags_read(struct file *file, char __user *user_buf, char *buf = kzalloc(mxln, GFP_KERNEL); int sf = 0; /* how many written so far */ + if (!buf) + return 0; + sf += snprintf(buf, mxln - sf, "0x%x\n", local->hw.flags); if (local->hw.flags & IEEE80211_HW_HAS_RATE_CONTROL) sf += snprintf(buf + sf, mxln - sf, "HAS_RATE_CONTROL\n"); @@ -355,6 +265,8 @@ static ssize_t hwflags_read(struct file *file, char __user *user_buf, sf += snprintf(buf + sf, mxln - sf, "SUPPORTS_PER_STA_GTK\n"); if (local->hw.flags & IEEE80211_HW_AP_LINK_PS) sf += snprintf(buf + sf, mxln - sf, "AP_LINK_PS\n"); + if (local->hw.flags & IEEE80211_HW_TX_AMPDU_SETUP_IN_HW) + sf += snprintf(buf + sf, mxln - sf, "TX_AMPDU_SETUP_IN_HW\n"); rv = simple_read_from_buffer(user_buf, count, ppos, buf, strlen(buf)); kfree(buf); @@ -450,10 +362,8 @@ void debugfs_hw_add(struct ieee80211_local *local) DEBUGFS_ADD(frequency); DEBUGFS_ADD(total_ps_buffered); DEBUGFS_ADD(wep_iv); - DEBUGFS_ADD(tsf); DEBUGFS_ADD(queues); DEBUGFS_ADD_MODE(reset, 0200); - DEBUGFS_ADD(noack); DEBUGFS_ADD(uapsd_queues); DEBUGFS_ADD(uapsd_max_sp_len); DEBUGFS_ADD(channel_type); diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index 9ea7c0d0103f..176c08ffb13c 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -21,6 +21,7 @@ #include "rate.h" #include "debugfs.h" #include "debugfs_netdev.h" +#include "driver-ops.h" static ssize_t ieee80211_if_read( struct ieee80211_sub_if_data *sdata, @@ -320,6 +321,7 @@ static ssize_t ieee80211_if_parse_tkip_mic_test( __IEEE80211_IF_FILE_W(tkip_mic_test); /* AP attributes */ +IEEE80211_IF_FILE(num_sta_authorized, u.ap.num_sta_authorized, ATOMIC); IEEE80211_IF_FILE(num_sta_ps, u.ap.num_sta_ps, ATOMIC); IEEE80211_IF_FILE(dtim_count, u.ap.dtim_count, DEC); @@ -331,6 +333,46 @@ static ssize_t ieee80211_if_fmt_num_buffered_multicast( } __IEEE80211_IF_FILE(num_buffered_multicast, NULL); +/* IBSS attributes */ +static ssize_t ieee80211_if_fmt_tsf( + const struct ieee80211_sub_if_data *sdata, char *buf, int buflen) +{ + struct ieee80211_local *local = sdata->local; + u64 tsf; + + tsf = drv_get_tsf(local, (struct ieee80211_sub_if_data *)sdata); + + return scnprintf(buf, buflen, "0x%016llx\n", (unsigned long long) tsf); +} + +static ssize_t ieee80211_if_parse_tsf( + struct ieee80211_sub_if_data *sdata, const char *buf, int buflen) +{ + struct ieee80211_local *local = sdata->local; + unsigned long long tsf; + int ret; + + if (strncmp(buf, "reset", 5) == 0) { + if (local->ops->reset_tsf) { + drv_reset_tsf(local, sdata); + wiphy_info(local->hw.wiphy, "debugfs reset TSF\n"); + } + } else { + ret = kstrtoull(buf, 10, &tsf); + if (ret < 0) + return -EINVAL; + if (local->ops->set_tsf) { + drv_set_tsf(local, sdata, tsf); + wiphy_info(local->hw.wiphy, + "debugfs set TSF to %#018llx\n", tsf); + } + } + + return buflen; +} +__IEEE80211_IF_FILE_W(tsf); + + /* WDS attributes */ IEEE80211_IF_FILE(peer, u.wds.remote_addr, MAC); @@ -340,6 +382,8 @@ IEEE80211_IF_FILE(fwded_mcast, u.mesh.mshstats.fwded_mcast, DEC); IEEE80211_IF_FILE(fwded_unicast, u.mesh.mshstats.fwded_unicast, DEC); IEEE80211_IF_FILE(fwded_frames, u.mesh.mshstats.fwded_frames, DEC); IEEE80211_IF_FILE(dropped_frames_ttl, u.mesh.mshstats.dropped_frames_ttl, DEC); +IEEE80211_IF_FILE(dropped_frames_congestion, + u.mesh.mshstats.dropped_frames_congestion, DEC); IEEE80211_IF_FILE(dropped_frames_no_route, u.mesh.mshstats.dropped_frames_no_route, DEC); IEEE80211_IF_FILE(estab_plinks, u.mesh.mshstats.estab_plinks, ATOMIC); @@ -362,6 +406,8 @@ IEEE80211_IF_FILE(dot11MeshHWMPactivePathTimeout, u.mesh.mshcfg.dot11MeshHWMPactivePathTimeout, DEC); IEEE80211_IF_FILE(dot11MeshHWMPpreqMinInterval, u.mesh.mshcfg.dot11MeshHWMPpreqMinInterval, DEC); +IEEE80211_IF_FILE(dot11MeshHWMPperrMinInterval, + u.mesh.mshcfg.dot11MeshHWMPperrMinInterval, DEC); IEEE80211_IF_FILE(dot11MeshHWMPnetDiameterTraversalTime, u.mesh.mshcfg.dot11MeshHWMPnetDiameterTraversalTime, DEC); IEEE80211_IF_FILE(dot11MeshHWMPmaxPREQretries, @@ -372,6 +418,10 @@ IEEE80211_IF_FILE(min_discovery_timeout, u.mesh.mshcfg.min_discovery_timeout, DEC); IEEE80211_IF_FILE(dot11MeshHWMPRootMode, u.mesh.mshcfg.dot11MeshHWMPRootMode, DEC); +IEEE80211_IF_FILE(dot11MeshGateAnnouncementProtocol, + u.mesh.mshcfg.dot11MeshGateAnnouncementProtocol, DEC); +IEEE80211_IF_FILE(dot11MeshHWMPRannInterval, + u.mesh.mshcfg.dot11MeshHWMPRannInterval, DEC); #endif @@ -409,12 +459,18 @@ static void add_ap_files(struct ieee80211_sub_if_data *sdata) DEBUGFS_ADD(rc_rateidx_mask_2ghz); DEBUGFS_ADD(rc_rateidx_mask_5ghz); + DEBUGFS_ADD(num_sta_authorized); DEBUGFS_ADD(num_sta_ps); DEBUGFS_ADD(dtim_count); DEBUGFS_ADD(num_buffered_multicast); DEBUGFS_ADD_MODE(tkip_mic_test, 0200); } +static void add_ibss_files(struct ieee80211_sub_if_data *sdata) +{ + DEBUGFS_ADD_MODE(tsf, 0600); +} + static void add_wds_files(struct ieee80211_sub_if_data *sdata) { DEBUGFS_ADD(drop_unencrypted); @@ -459,6 +515,7 @@ static void add_mesh_stats(struct ieee80211_sub_if_data *sdata) MESHSTATS_ADD(fwded_frames); MESHSTATS_ADD(dropped_frames_ttl); MESHSTATS_ADD(dropped_frames_no_route); + MESHSTATS_ADD(dropped_frames_congestion); MESHSTATS_ADD(estab_plinks); #undef MESHSTATS_ADD } @@ -481,11 +538,14 @@ static void add_mesh_config(struct ieee80211_sub_if_data *sdata) MESHPARAMS_ADD(dot11MeshMaxPeerLinks); MESHPARAMS_ADD(dot11MeshHWMPactivePathTimeout); MESHPARAMS_ADD(dot11MeshHWMPpreqMinInterval); + MESHPARAMS_ADD(dot11MeshHWMPperrMinInterval); MESHPARAMS_ADD(dot11MeshHWMPnetDiameterTraversalTime); MESHPARAMS_ADD(dot11MeshHWMPmaxPREQretries); MESHPARAMS_ADD(path_refresh_time); MESHPARAMS_ADD(min_discovery_timeout); - + MESHPARAMS_ADD(dot11MeshHWMPRootMode); + MESHPARAMS_ADD(dot11MeshHWMPRannInterval); + MESHPARAMS_ADD(dot11MeshGateAnnouncementProtocol); #undef MESHPARAMS_ADD } #endif @@ -506,7 +566,7 @@ static void add_files(struct ieee80211_sub_if_data *sdata) add_sta_files(sdata); break; case NL80211_IFTYPE_ADHOC: - /* XXX */ + add_ibss_files(sdata); break; case NL80211_IFTYPE_AP: add_ap_files(sdata); diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c index a01d2137fddc..2406b3e7393f 100644 --- a/net/mac80211/debugfs_sta.c +++ b/net/mac80211/debugfs_sta.c @@ -56,19 +56,22 @@ STA_FILE(last_signal, last_signal, D); static ssize_t sta_flags_read(struct file *file, char __user *userbuf, size_t count, loff_t *ppos) { - char buf[100]; + char buf[121]; struct sta_info *sta = file->private_data; - u32 staflags = get_sta_flags(sta); - int res = scnprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s", - staflags & WLAN_STA_AUTH ? "AUTH\n" : "", - staflags & WLAN_STA_ASSOC ? "ASSOC\n" : "", - staflags & WLAN_STA_PS_STA ? "PS (sta)\n" : "", - staflags & WLAN_STA_PS_DRIVER ? "PS (driver)\n" : "", - staflags & WLAN_STA_AUTHORIZED ? "AUTHORIZED\n" : "", - staflags & WLAN_STA_SHORT_PREAMBLE ? "SHORT PREAMBLE\n" : "", - staflags & WLAN_STA_WME ? "WME\n" : "", - staflags & WLAN_STA_WDS ? "WDS\n" : "", - staflags & WLAN_STA_MFP ? "MFP\n" : ""); + +#define TEST(flg) \ + test_sta_flag(sta, WLAN_STA_##flg) ? #flg "\n" : "" + + int res = scnprintf(buf, sizeof(buf), + "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", + TEST(AUTH), TEST(ASSOC), TEST(PS_STA), + TEST(PS_DRIVER), TEST(AUTHORIZED), + TEST(SHORT_PREAMBLE), + TEST(WME), TEST(WDS), TEST(CLEAR_PS_FILT), + TEST(MFP), TEST(BLOCK_BA), TEST(PSPOLL), + TEST(UAPSD), TEST(SP), TEST(TDLS_PEER), + TEST(TDLS_PEER_AUTH)); +#undef TEST return simple_read_from_buffer(userbuf, count, ppos, buf, res); } STA_OPS(flags); @@ -78,8 +81,14 @@ static ssize_t sta_num_ps_buf_frames_read(struct file *file, size_t count, loff_t *ppos) { struct sta_info *sta = file->private_data; - return mac80211_format_buffer(userbuf, count, ppos, "%u\n", - skb_queue_len(&sta->ps_tx_buf)); + char buf[17*IEEE80211_NUM_ACS], *p = buf; + int ac; + + for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) + p += scnprintf(p, sizeof(buf)+buf-p, "AC%d: %d\n", ac, + skb_queue_len(&sta->ps_tx_buf[ac]) + + skb_queue_len(&sta->tx_filtered[ac])); + return simple_read_from_buffer(userbuf, count, ppos, buf, p - buf); } STA_OPS(num_ps_buf_frames); @@ -265,9 +274,9 @@ static ssize_t sta_ht_capa_read(struct file *file, char __user *userbuf, PRINT_HT_CAP((htc->cap & BIT(10)), "HT Delayed Block Ack"); - PRINT_HT_CAP((htc->cap & BIT(11)), "Max AMSDU length: " - "3839 bytes"); PRINT_HT_CAP(!(htc->cap & BIT(11)), "Max AMSDU length: " + "3839 bytes"); + PRINT_HT_CAP((htc->cap & BIT(11)), "Max AMSDU length: " "7935 bytes"); /* diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 1425380983f7..e8960ae39861 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -5,11 +5,34 @@ #include "ieee80211_i.h" #include "driver-trace.h" +static inline void check_sdata_in_driver(struct ieee80211_sub_if_data *sdata) +{ + WARN_ON(!(sdata->flags & IEEE80211_SDATA_IN_DRIVER)); +} + +static inline struct ieee80211_sub_if_data * +get_bss_sdata(struct ieee80211_sub_if_data *sdata) +{ + if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) + sdata = container_of(sdata->bss, struct ieee80211_sub_if_data, + u.ap); + + return sdata; +} + static inline void drv_tx(struct ieee80211_local *local, struct sk_buff *skb) { local->ops->tx(&local->hw, skb); } +static inline void drv_tx_frags(struct ieee80211_local *local, + struct ieee80211_vif *vif, + struct ieee80211_sta *sta, + struct sk_buff_head *skbs) +{ + local->ops->tx_frags(&local->hw, vif, sta, skbs); +} + static inline int drv_start(struct ieee80211_local *local) { int ret; @@ -69,15 +92,23 @@ static inline int drv_resume(struct ieee80211_local *local) #endif static inline int drv_add_interface(struct ieee80211_local *local, - struct ieee80211_vif *vif) + struct ieee80211_sub_if_data *sdata) { int ret; might_sleep(); - trace_drv_add_interface(local, vif_to_sdata(vif)); - ret = local->ops->add_interface(&local->hw, vif); + if (WARN_ON(sdata->vif.type == NL80211_IFTYPE_AP_VLAN || + sdata->vif.type == NL80211_IFTYPE_MONITOR)) + return -EINVAL; + + trace_drv_add_interface(local, sdata); + ret = local->ops->add_interface(&local->hw, &sdata->vif); trace_drv_return_int(local, ret); + + if (ret == 0) + sdata->flags |= IEEE80211_SDATA_IN_DRIVER; + return ret; } @@ -89,6 +120,8 @@ static inline int drv_change_interface(struct ieee80211_local *local, might_sleep(); + check_sdata_in_driver(sdata); + trace_drv_change_interface(local, sdata, type, p2p); ret = local->ops->change_interface(&local->hw, &sdata->vif, type, p2p); trace_drv_return_int(local, ret); @@ -96,12 +129,15 @@ static inline int drv_change_interface(struct ieee80211_local *local, } static inline void drv_remove_interface(struct ieee80211_local *local, - struct ieee80211_vif *vif) + struct ieee80211_sub_if_data *sdata) { might_sleep(); - trace_drv_remove_interface(local, vif_to_sdata(vif)); - local->ops->remove_interface(&local->hw, vif); + check_sdata_in_driver(sdata); + + trace_drv_remove_interface(local, sdata); + local->ops->remove_interface(&local->hw, &sdata->vif); + sdata->flags &= ~IEEE80211_SDATA_IN_DRIVER; trace_drv_return_void(local); } @@ -124,6 +160,8 @@ static inline void drv_bss_info_changed(struct ieee80211_local *local, { might_sleep(); + check_sdata_in_driver(sdata); + trace_drv_bss_info_changed(local, sdata, info, changed); if (local->ops->bss_info_changed) local->ops->bss_info_changed(&local->hw, &sdata->vif, info, changed); @@ -139,6 +177,8 @@ static inline int drv_tx_sync(struct ieee80211_local *local, might_sleep(); + check_sdata_in_driver(sdata); + trace_drv_tx_sync(local, sdata, bssid, type); if (local->ops->tx_sync) ret = local->ops->tx_sync(&local->hw, &sdata->vif, @@ -154,6 +194,8 @@ static inline void drv_finish_tx_sync(struct ieee80211_local *local, { might_sleep(); + check_sdata_in_driver(sdata); + trace_drv_finish_tx_sync(local, sdata, bssid, type); if (local->ops->finish_tx_sync) local->ops->finish_tx_sync(&local->hw, &sdata->vif, @@ -211,6 +253,8 @@ static inline int drv_set_key(struct ieee80211_local *local, might_sleep(); + check_sdata_in_driver(sdata); + trace_drv_set_key(local, cmd, sdata, sta, key); ret = local->ops->set_key(&local->hw, cmd, &sdata->vif, sta, key); trace_drv_return_int(local, ret); @@ -228,6 +272,8 @@ static inline void drv_update_tkip_key(struct ieee80211_local *local, if (sta) ista = &sta->sta; + check_sdata_in_driver(sdata); + trace_drv_update_tkip_key(local, sdata, conf, ista, iv32); if (local->ops->update_tkip_key) local->ops->update_tkip_key(&local->hw, &sdata->vif, conf, @@ -243,6 +289,8 @@ static inline int drv_hw_scan(struct ieee80211_local *local, might_sleep(); + check_sdata_in_driver(sdata); + trace_drv_hw_scan(local, sdata); ret = local->ops->hw_scan(&local->hw, &sdata->vif, req); trace_drv_return_int(local, ret); @@ -254,6 +302,8 @@ static inline void drv_cancel_hw_scan(struct ieee80211_local *local, { might_sleep(); + check_sdata_in_driver(sdata); + trace_drv_cancel_hw_scan(local, sdata); local->ops->cancel_hw_scan(&local->hw, &sdata->vif); trace_drv_return_void(local); @@ -269,6 +319,8 @@ drv_sched_scan_start(struct ieee80211_local *local, might_sleep(); + check_sdata_in_driver(sdata); + trace_drv_sched_scan_start(local, sdata); ret = local->ops->sched_scan_start(&local->hw, &sdata->vif, req, ies); @@ -281,6 +333,8 @@ static inline void drv_sched_scan_stop(struct ieee80211_local *local, { might_sleep(); + check_sdata_in_driver(sdata); + trace_drv_sched_scan_stop(local, sdata); local->ops->sched_scan_stop(&local->hw, &sdata->vif); trace_drv_return_void(local); @@ -377,6 +431,9 @@ static inline void drv_sta_notify(struct ieee80211_local *local, enum sta_notify_cmd cmd, struct ieee80211_sta *sta) { + sdata = get_bss_sdata(sdata); + check_sdata_in_driver(sdata); + trace_drv_sta_notify(local, sdata, cmd, sta); if (local->ops->sta_notify) local->ops->sta_notify(&local->hw, &sdata->vif, cmd, sta); @@ -391,6 +448,9 @@ static inline int drv_sta_add(struct ieee80211_local *local, might_sleep(); + sdata = get_bss_sdata(sdata); + check_sdata_in_driver(sdata); + trace_drv_sta_add(local, sdata, sta); if (local->ops->sta_add) ret = local->ops->sta_add(&local->hw, &sdata->vif, sta); @@ -406,6 +466,9 @@ static inline void drv_sta_remove(struct ieee80211_local *local, { might_sleep(); + sdata = get_bss_sdata(sdata); + check_sdata_in_driver(sdata); + trace_drv_sta_remove(local, sdata, sta); if (local->ops->sta_remove) local->ops->sta_remove(&local->hw, &sdata->vif, sta); @@ -413,50 +476,64 @@ static inline void drv_sta_remove(struct ieee80211_local *local, trace_drv_return_void(local); } -static inline int drv_conf_tx(struct ieee80211_local *local, u16 queue, +static inline int drv_conf_tx(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, u16 queue, const struct ieee80211_tx_queue_params *params) { int ret = -EOPNOTSUPP; might_sleep(); - trace_drv_conf_tx(local, queue, params); + check_sdata_in_driver(sdata); + + trace_drv_conf_tx(local, sdata, queue, params); if (local->ops->conf_tx) - ret = local->ops->conf_tx(&local->hw, queue, params); + ret = local->ops->conf_tx(&local->hw, &sdata->vif, + queue, params); trace_drv_return_int(local, ret); return ret; } -static inline u64 drv_get_tsf(struct ieee80211_local *local) +static inline u64 drv_get_tsf(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata) { u64 ret = -1ULL; might_sleep(); - trace_drv_get_tsf(local); + check_sdata_in_driver(sdata); + + trace_drv_get_tsf(local, sdata); if (local->ops->get_tsf) - ret = local->ops->get_tsf(&local->hw); + ret = local->ops->get_tsf(&local->hw, &sdata->vif); trace_drv_return_u64(local, ret); return ret; } -static inline void drv_set_tsf(struct ieee80211_local *local, u64 tsf) +static inline void drv_set_tsf(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + u64 tsf) { might_sleep(); - trace_drv_set_tsf(local, tsf); + check_sdata_in_driver(sdata); + + trace_drv_set_tsf(local, sdata, tsf); if (local->ops->set_tsf) - local->ops->set_tsf(&local->hw, tsf); + local->ops->set_tsf(&local->hw, &sdata->vif, tsf); trace_drv_return_void(local); } -static inline void drv_reset_tsf(struct ieee80211_local *local) +static inline void drv_reset_tsf(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata) { might_sleep(); - trace_drv_reset_tsf(local); + check_sdata_in_driver(sdata); + + trace_drv_reset_tsf(local, sdata); if (local->ops->reset_tsf) - local->ops->reset_tsf(&local->hw); + local->ops->reset_tsf(&local->hw, &sdata->vif); trace_drv_return_void(local); } @@ -483,6 +560,9 @@ static inline int drv_ampdu_action(struct ieee80211_local *local, might_sleep(); + sdata = get_bss_sdata(sdata); + check_sdata_in_driver(sdata); + trace_drv_ampdu_action(local, sdata, action, sta, tid, ssn, buf_size); if (local->ops->ampdu_action) @@ -590,37 +670,6 @@ static inline int drv_cancel_remain_on_channel(struct ieee80211_local *local) return ret; } -static inline int drv_offchannel_tx(struct ieee80211_local *local, - struct sk_buff *skb, - struct ieee80211_channel *chan, - enum nl80211_channel_type channel_type, - unsigned int wait) -{ - int ret; - - might_sleep(); - - trace_drv_offchannel_tx(local, skb, chan, channel_type, wait); - ret = local->ops->offchannel_tx(&local->hw, skb, chan, - channel_type, wait); - trace_drv_return_int(local, ret); - - return ret; -} - -static inline int drv_offchannel_tx_cancel_wait(struct ieee80211_local *local) -{ - int ret; - - might_sleep(); - - trace_drv_offchannel_tx_cancel_wait(local); - ret = local->ops->offchannel_tx_cancel_wait(&local->hw); - trace_drv_return_int(local, ret); - - return ret; -} - static inline int drv_set_ringparam(struct ieee80211_local *local, u32 tx, u32 rx) { @@ -669,6 +718,8 @@ static inline int drv_set_bitrate_mask(struct ieee80211_local *local, might_sleep(); + check_sdata_in_driver(sdata); + trace_drv_set_bitrate_mask(local, sdata, mask); if (local->ops->set_bitrate_mask) ret = local->ops->set_bitrate_mask(&local->hw, @@ -682,6 +733,8 @@ static inline void drv_set_rekey_data(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct cfg80211_gtk_rekey_data *data) { + check_sdata_in_driver(sdata); + trace_drv_set_rekey_data(local, sdata, data); if (local->ops->set_rekey_data) local->ops->set_rekey_data(&local->hw, &sdata->vif, data); @@ -696,4 +749,34 @@ static inline void drv_rssi_callback(struct ieee80211_local *local, local->ops->rssi_callback(&local->hw, event); trace_drv_return_void(local); } + +static inline void +drv_release_buffered_frames(struct ieee80211_local *local, + struct sta_info *sta, u16 tids, int num_frames, + enum ieee80211_frame_release_type reason, + bool more_data) +{ + trace_drv_release_buffered_frames(local, &sta->sta, tids, num_frames, + reason, more_data); + if (local->ops->release_buffered_frames) + local->ops->release_buffered_frames(&local->hw, &sta->sta, tids, + num_frames, reason, + more_data); + trace_drv_return_void(local); +} + +static inline void +drv_allow_buffered_frames(struct ieee80211_local *local, + struct sta_info *sta, u16 tids, int num_frames, + enum ieee80211_frame_release_type reason, + bool more_data) +{ + trace_drv_allow_buffered_frames(local, &sta->sta, tids, num_frames, + reason, more_data); + if (local->ops->allow_buffered_frames) + local->ops->allow_buffered_frames(&local->hw, &sta->sta, + tids, num_frames, reason, + more_data); + trace_drv_return_void(local); +} #endif /* __MAC80211_DRIVER_OPS */ diff --git a/net/mac80211/driver-trace.h b/net/mac80211/driver-trace.h index f47b00dc7afd..6e9df8fd8fb8 100644 --- a/net/mac80211/driver-trace.h +++ b/net/mac80211/driver-trace.h @@ -5,17 +5,6 @@ #include <net/mac80211.h> #include "ieee80211_i.h" -#if !defined(CONFIG_MAC80211_DRIVER_API_TRACER) || defined(__CHECKER__) -#undef TRACE_EVENT -#define TRACE_EVENT(name, proto, ...) \ -static inline void trace_ ## name(proto) {} -#undef DECLARE_EVENT_CLASS -#define DECLARE_EVENT_CLASS(...) -#undef DEFINE_EVENT -#define DEFINE_EVENT(evt_class, name, proto, ...) \ -static inline void trace_ ## name(proto) {} -#endif - #undef TRACE_SYSTEM #define TRACE_SYSTEM mac80211 @@ -697,64 +686,76 @@ TRACE_EVENT(drv_sta_remove, ); TRACE_EVENT(drv_conf_tx, - TP_PROTO(struct ieee80211_local *local, u16 queue, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + u16 queue, const struct ieee80211_tx_queue_params *params), - TP_ARGS(local, queue, params), + TP_ARGS(local, sdata, queue, params), TP_STRUCT__entry( LOCAL_ENTRY + VIF_ENTRY __field(u16, queue) __field(u16, txop) __field(u16, cw_min) __field(u16, cw_max) __field(u8, aifs) + __field(bool, uapsd) ), TP_fast_assign( LOCAL_ASSIGN; + VIF_ASSIGN; __entry->queue = queue; __entry->txop = params->txop; __entry->cw_max = params->cw_max; __entry->cw_min = params->cw_min; __entry->aifs = params->aifs; + __entry->uapsd = params->uapsd; ), TP_printk( - LOCAL_PR_FMT " queue:%d", - LOCAL_PR_ARG, __entry->queue + LOCAL_PR_FMT VIF_PR_FMT " queue:%d", + LOCAL_PR_ARG, VIF_PR_ARG, __entry->queue ) ); -DEFINE_EVENT(local_only_evt, drv_get_tsf, - TP_PROTO(struct ieee80211_local *local), - TP_ARGS(local) +DEFINE_EVENT(local_sdata_evt, drv_get_tsf, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata), + TP_ARGS(local, sdata) ); TRACE_EVENT(drv_set_tsf, - TP_PROTO(struct ieee80211_local *local, u64 tsf), + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + u64 tsf), - TP_ARGS(local, tsf), + TP_ARGS(local, sdata, tsf), TP_STRUCT__entry( LOCAL_ENTRY + VIF_ENTRY __field(u64, tsf) ), TP_fast_assign( LOCAL_ASSIGN; + VIF_ASSIGN; __entry->tsf = tsf; ), TP_printk( - LOCAL_PR_FMT " tsf:%llu", - LOCAL_PR_ARG, (unsigned long long)__entry->tsf + LOCAL_PR_FMT VIF_PR_FMT " tsf:%llu", + LOCAL_PR_ARG, VIF_PR_ARG, (unsigned long long)__entry->tsf ) ); -DEFINE_EVENT(local_only_evt, drv_reset_tsf, - TP_PROTO(struct ieee80211_local *local), - TP_ARGS(local) +DEFINE_EVENT(local_sdata_evt, drv_reset_tsf, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata), + TP_ARGS(local, sdata) ); DEFINE_EVENT(local_only_evt, drv_tx_last_beacon, @@ -1117,6 +1118,61 @@ TRACE_EVENT(drv_rssi_callback, ) ); +DECLARE_EVENT_CLASS(release_evt, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sta *sta, + u16 tids, int num_frames, + enum ieee80211_frame_release_type reason, + bool more_data), + + TP_ARGS(local, sta, tids, num_frames, reason, more_data), + + TP_STRUCT__entry( + LOCAL_ENTRY + STA_ENTRY + __field(u16, tids) + __field(int, num_frames) + __field(int, reason) + __field(bool, more_data) + ), + + TP_fast_assign( + LOCAL_ASSIGN; + STA_ASSIGN; + __entry->tids = tids; + __entry->num_frames = num_frames; + __entry->reason = reason; + __entry->more_data = more_data; + ), + + TP_printk( + LOCAL_PR_FMT STA_PR_FMT + " TIDs:0x%.4x frames:%d reason:%d more:%d", + LOCAL_PR_ARG, STA_PR_ARG, __entry->tids, __entry->num_frames, + __entry->reason, __entry->more_data + ) +); + +DEFINE_EVENT(release_evt, drv_release_buffered_frames, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sta *sta, + u16 tids, int num_frames, + enum ieee80211_frame_release_type reason, + bool more_data), + + TP_ARGS(local, sta, tids, num_frames, reason, more_data) +); + +DEFINE_EVENT(release_evt, drv_allow_buffered_frames, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sta *sta, + u16 tids, int num_frames, + enum ieee80211_frame_release_type reason, + bool more_data), + + TP_ARGS(local, sta, tids, num_frames, reason, more_data) +); + /* * Tracing for API calls that drivers call. */ @@ -1431,6 +1487,28 @@ TRACE_EVENT(api_enable_rssi_reports, ) ); +TRACE_EVENT(api_eosp, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sta *sta), + + TP_ARGS(local, sta), + + TP_STRUCT__entry( + LOCAL_ENTRY + STA_ENTRY + ), + + TP_fast_assign( + LOCAL_ASSIGN; + STA_ASSIGN; + ), + + TP_printk( + LOCAL_PR_FMT STA_PR_FMT, + LOCAL_PR_ARG, STA_PR_FMT + ) +); + /* * Tracing for internal functions * (which may also be called in response to driver calls) diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index 7cfc286946c0..f25fff7607d8 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -14,11 +14,89 @@ */ #include <linux/ieee80211.h> +#include <linux/export.h> #include <net/mac80211.h> #include "ieee80211_i.h" #include "rate.h" -void ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_supported_band *sband, +bool ieee80111_cfg_override_disables_ht40(struct ieee80211_sub_if_data *sdata) +{ + const __le16 flg = cpu_to_le16(IEEE80211_HT_CAP_SUP_WIDTH_20_40); + if ((sdata->u.mgd.ht_capa_mask.cap_info & flg) && + !(sdata->u.mgd.ht_capa.cap_info & flg)) + return true; + return false; +} + +static void __check_htcap_disable(struct ieee80211_sub_if_data *sdata, + struct ieee80211_sta_ht_cap *ht_cap, + u16 flag) +{ + __le16 le_flag = cpu_to_le16(flag); + if (sdata->u.mgd.ht_capa_mask.cap_info & le_flag) { + if (!(sdata->u.mgd.ht_capa.cap_info & le_flag)) + ht_cap->cap &= ~flag; + } +} + +void ieee80211_apply_htcap_overrides(struct ieee80211_sub_if_data *sdata, + struct ieee80211_sta_ht_cap *ht_cap) +{ + u8 *scaps = (u8 *)(&sdata->u.mgd.ht_capa.mcs.rx_mask); + u8 *smask = (u8 *)(&sdata->u.mgd.ht_capa_mask.mcs.rx_mask); + int i; + + if (sdata->vif.type != NL80211_IFTYPE_STATION) { + /* AP interfaces call this code when adding new stations, + * so just silently ignore non station interfaces. + */ + return; + } + + /* NOTE: If you add more over-rides here, update register_hw + * ht_capa_mod_msk logic in main.c as well. + * And, if this method can ever change ht_cap.ht_supported, fix + * the check in ieee80211_add_ht_ie. + */ + + /* check for HT over-rides, MCS rates first. */ + for (i = 0; i < IEEE80211_HT_MCS_MASK_LEN; i++) { + u8 m = smask[i]; + ht_cap->mcs.rx_mask[i] &= ~m; /* turn off all masked bits */ + /* Add back rates that are supported */ + ht_cap->mcs.rx_mask[i] |= (m & scaps[i]); + } + + /* Force removal of HT-40 capabilities? */ + __check_htcap_disable(sdata, ht_cap, IEEE80211_HT_CAP_SUP_WIDTH_20_40); + __check_htcap_disable(sdata, ht_cap, IEEE80211_HT_CAP_SGI_40); + + /* Allow user to disable the max-AMSDU bit. */ + __check_htcap_disable(sdata, ht_cap, IEEE80211_HT_CAP_MAX_AMSDU); + + /* Allow user to decrease AMPDU factor */ + if (sdata->u.mgd.ht_capa_mask.ampdu_params_info & + IEEE80211_HT_AMPDU_PARM_FACTOR) { + u8 n = sdata->u.mgd.ht_capa.ampdu_params_info + & IEEE80211_HT_AMPDU_PARM_FACTOR; + if (n < ht_cap->ampdu_factor) + ht_cap->ampdu_factor = n; + } + + /* Allow the user to increase AMPDU density. */ + if (sdata->u.mgd.ht_capa_mask.ampdu_params_info & + IEEE80211_HT_AMPDU_PARM_DENSITY) { + u8 n = (sdata->u.mgd.ht_capa.ampdu_params_info & + IEEE80211_HT_AMPDU_PARM_DENSITY) + >> IEEE80211_HT_AMPDU_PARM_DENSITY_SHIFT; + if (n > ht_cap->ampdu_density) + ht_cap->ampdu_density = n; + } +} + + +void ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata, + struct ieee80211_supported_band *sband, struct ieee80211_ht_cap *ht_cap_ie, struct ieee80211_sta_ht_cap *ht_cap) { @@ -102,6 +180,12 @@ void ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_supported_band *sband, /* handle MCS rate 32 too */ if (sband->ht_cap.mcs.rx_mask[32/8] & ht_cap_ie->mcs.rx_mask[32/8] & 1) ht_cap->mcs.rx_mask[32/8] |= 1; + + /* + * If user has specified capability over-rides, take care + * of that here. + */ + ieee80211_apply_htcap_overrides(sdata, ht_cap); } void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta, bool tx) @@ -130,7 +214,7 @@ void ieee80211_ba_session_work(struct work_struct *work) * down by the code that set the flag, so this * need not run. */ - if (test_sta_flags(sta, WLAN_STA_BLOCK_BA)) + if (test_sta_flag(sta, WLAN_STA_BLOCK_BA)) return; mutex_lock(&sta->ampdu_mlme.mtx); @@ -186,12 +270,8 @@ void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata, u16 params; skb = dev_alloc_skb(sizeof(*mgmt) + local->hw.extra_tx_headroom); - - if (!skb) { - printk(KERN_ERR "%s: failed to allocate buffer " - "for delba frame\n", sdata->name); + if (!skb) return; - } skb_reserve(skb, local->hw.extra_tx_headroom); mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24); @@ -199,10 +279,13 @@ void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata, memcpy(mgmt->da, da, ETH_ALEN); memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN); if (sdata->vif.type == NL80211_IFTYPE_AP || - sdata->vif.type == NL80211_IFTYPE_AP_VLAN) + sdata->vif.type == NL80211_IFTYPE_AP_VLAN || + sdata->vif.type == NL80211_IFTYPE_MESH_POINT) memcpy(mgmt->bssid, sdata->vif.addr, ETH_ALEN); else if (sdata->vif.type == NL80211_IFTYPE_STATION) memcpy(mgmt->bssid, sdata->u.mgd.bssid, ETH_ALEN); + else if (sdata->vif.type == NL80211_IFTYPE_ADHOC) + memcpy(mgmt->bssid, sdata->u.ibss.bssid, ETH_ALEN); mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_ACTION); @@ -217,7 +300,7 @@ void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata, mgmt->u.action.u.delba.params = cpu_to_le16(params); mgmt->u.action.u.delba.reason_code = cpu_to_le16(reason_code); - ieee80211_tx_skb(sdata, skb); + ieee80211_tx_skb_tid(sdata, skb, tid); } void ieee80211_process_delba(struct ieee80211_sub_if_data *sdata, diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 56c24cabf26d..b3d76b756cd5 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -77,14 +77,15 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, struct cfg80211_bss *bss; u32 bss_change; u8 supp_rates[IEEE80211_MAX_SUPP_RATES]; + enum nl80211_channel_type channel_type; lockdep_assert_held(&ifibss->mtx); /* Reset own TSF to allow time synchronization work. */ - drv_reset_tsf(local); + drv_reset_tsf(local, sdata); skb = ifibss->skb; - rcu_assign_pointer(ifibss->presp, NULL); + RCU_INIT_POINTER(ifibss->presp, NULL); synchronize_rcu(); skb->data = skb->head; skb->len = 0; @@ -97,6 +98,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, /* if merging, indicate to driver that we leave the old IBSS */ if (sdata->vif.bss_conf.ibss_joined) { sdata->vif.bss_conf.ibss_joined = false; + netif_carrier_off(sdata->dev); ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_IBSS); } @@ -104,8 +106,16 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, sdata->drop_unencrypted = capability & WLAN_CAPABILITY_PRIVACY ? 1 : 0; - local->oper_channel = chan; - WARN_ON(!ieee80211_set_channel_type(local, sdata, NL80211_CHAN_NO_HT)); + channel_type = ifibss->channel_type; + if (channel_type > NL80211_CHAN_HT20 && + !cfg80211_can_beacon_sec_chan(local->hw.wiphy, chan, channel_type)) + channel_type = NL80211_CHAN_HT20; + if (!ieee80211_set_channel_type(local, sdata, channel_type)) { + /* can only fail due to HT40+/- mismatch */ + channel_type = NL80211_CHAN_HT20; + WARN_ON(!ieee80211_set_channel_type(local, sdata, + NL80211_CHAN_HT20)); + } ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL); sband = local->hw.wiphy->bands[chan->band]; @@ -171,6 +181,19 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, memcpy(skb_put(skb, ifibss->ie_len), ifibss->ie, ifibss->ie_len); + /* add HT capability and information IEs */ + if (channel_type && sband->ht_cap.ht_supported) { + pos = skb_put(skb, 4 + + sizeof(struct ieee80211_ht_cap) + + sizeof(struct ieee80211_ht_info)); + pos = ieee80211_ie_build_ht_cap(pos, &sband->ht_cap, + sband->ht_cap.cap); + pos = ieee80211_ie_build_ht_info(pos, + &sband->ht_cap, + chan, + channel_type); + } + if (local->hw.queues >= 4) { pos = skb_put(skb, 9); *pos++ = WLAN_EID_VENDOR_SPECIFIC; @@ -194,6 +217,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, bss_change |= BSS_CHANGED_BEACON; bss_change |= BSS_CHANGED_BEACON_ENABLED; bss_change |= BSS_CHANGED_BASIC_RATES; + bss_change |= BSS_CHANGED_HT; bss_change |= BSS_CHANGED_IBSS; sdata->vif.bss_conf.ibss_joined = true; ieee80211_bss_info_change_notify(sdata, bss_change); @@ -207,6 +231,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, bss = cfg80211_inform_bss_frame(local->hw.wiphy, local->hw.conf.channel, mgmt, skb->len, 0, GFP_KERNEL); cfg80211_put_bss(bss); + netif_carrier_on(sdata->dev); cfg80211_ibss_joined(sdata->dev, ifibss->bssid, GFP_KERNEL); } @@ -250,6 +275,80 @@ static void ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, cbss->tsf); } +static struct sta_info *ieee80211_ibss_finish_sta(struct sta_info *sta) + __acquires(RCU) +{ + struct ieee80211_sub_if_data *sdata = sta->sdata; + u8 addr[ETH_ALEN]; + + memcpy(addr, sta->sta.addr, ETH_ALEN); + +#ifdef CONFIG_MAC80211_VERBOSE_DEBUG + wiphy_debug(sdata->local->hw.wiphy, + "Adding new IBSS station %pM (dev=%s)\n", + addr, sdata->name); +#endif + + sta_info_move_state(sta, IEEE80211_STA_AUTH); + sta_info_move_state(sta, IEEE80211_STA_ASSOC); + sta_info_move_state(sta, IEEE80211_STA_AUTHORIZED); + + rate_control_rate_init(sta); + + /* If it fails, maybe we raced another insertion? */ + if (sta_info_insert_rcu(sta)) + return sta_info_get(sdata, addr); + return sta; +} + +static struct sta_info * +ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata, + const u8 *bssid, const u8 *addr, + u32 supp_rates) + __acquires(RCU) +{ + struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; + struct ieee80211_local *local = sdata->local; + struct sta_info *sta; + int band = local->hw.conf.channel->band; + + /* + * XXX: Consider removing the least recently used entry and + * allow new one to be added. + */ + if (local->num_sta >= IEEE80211_IBSS_MAX_STA_ENTRIES) { + if (net_ratelimit()) + printk(KERN_DEBUG "%s: No room for a new IBSS STA entry %pM\n", + sdata->name, addr); + rcu_read_lock(); + return NULL; + } + + if (ifibss->state == IEEE80211_IBSS_MLME_SEARCH) { + rcu_read_lock(); + return NULL; + } + + if (compare_ether_addr(bssid, sdata->u.ibss.bssid)) { + rcu_read_lock(); + return NULL; + } + + sta = sta_info_alloc(sdata, addr, GFP_KERNEL); + if (!sta) { + rcu_read_lock(); + return NULL; + } + + sta->last_rx = jiffies; + + /* make sure mandatory rates are always added */ + sta->sta.supp_rates[band] = supp_rates | + ieee80211_mandatory_rates(local, band); + + return ieee80211_ibss_finish_sta(sta); +} + static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt, size_t len, @@ -266,6 +365,8 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, u64 beacon_timestamp, rx_timestamp; u32 supp_rates = 0; enum ieee80211_band band = rx_status->band; + struct ieee80211_supported_band *sband = local->hw.wiphy->bands[band]; + bool rates_updated = false; if (elems->ds_params && elems->ds_params_len == 1) freq = ieee80211_channel_to_frequency(elems->ds_params[0], @@ -305,16 +406,50 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, prev_rates, sta->sta.supp_rates[band]); #endif - rate_control_rate_init(sta); + rates_updated = true; } - } else + } else { + rcu_read_unlock(); sta = ieee80211_ibss_add_sta(sdata, mgmt->bssid, - mgmt->sa, supp_rates, - GFP_ATOMIC); + mgmt->sa, supp_rates); + } } if (sta && elems->wmm_info) - set_sta_flags(sta, WLAN_STA_WME); + set_sta_flag(sta, WLAN_STA_WME); + + if (sta && elems->ht_info_elem && elems->ht_cap_elem && + sdata->u.ibss.channel_type != NL80211_CHAN_NO_HT) { + /* we both use HT */ + struct ieee80211_sta_ht_cap sta_ht_cap_new; + enum nl80211_channel_type channel_type = + ieee80211_ht_info_to_channel_type( + elems->ht_info_elem); + + ieee80211_ht_cap_ie_to_sta_ht_cap(sdata, sband, + elems->ht_cap_elem, + &sta_ht_cap_new); + + /* + * fall back to HT20 if we don't use or use + * the other extension channel + */ + if ((channel_type == NL80211_CHAN_HT40MINUS || + channel_type == NL80211_CHAN_HT40PLUS) && + channel_type != sdata->u.ibss.channel_type) + sta_ht_cap_new.cap &= + ~IEEE80211_HT_CAP_SUP_WIDTH_20_40; + + if (memcmp(&sta->sta.ht_cap, &sta_ht_cap_new, + sizeof(sta_ht_cap_new))) { + memcpy(&sta->sta.ht_cap, &sta_ht_cap_new, + sizeof(sta_ht_cap_new)); + rates_updated = true; + } + } + + if (sta && rates_updated) + rate_control_rate_init(sta); rcu_read_unlock(); } @@ -382,7 +517,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, * second best option: get current TSF * (will return -1 if not supported) */ - rx_timestamp = drv_get_tsf(local); + rx_timestamp = drv_get_tsf(local, sdata); } #ifdef CONFIG_MAC80211_IBSS_DEBUG @@ -404,21 +539,17 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, ieee80211_sta_join_ibss(sdata, bss); supp_rates = ieee80211_sta_get_rates(local, elems, band); ieee80211_ibss_add_sta(sdata, mgmt->bssid, mgmt->sa, - supp_rates, GFP_KERNEL); + supp_rates); + rcu_read_unlock(); } put_bss: ieee80211_rx_bss_put(local, bss); } -/* - * Add a new IBSS station, will also be called by the RX code when, - * in IBSS mode, receiving a frame from a yet-unknown station, hence - * must be callable in atomic context. - */ -struct sta_info *ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata, - u8 *bssid,u8 *addr, u32 supp_rates, - gfp_t gfp) +void ieee80211_ibss_rx_no_sta(struct ieee80211_sub_if_data *sdata, + const u8 *bssid, const u8 *addr, + u32 supp_rates) { struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; struct ieee80211_local *local = sdata->local; @@ -433,37 +564,29 @@ struct sta_info *ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata, if (net_ratelimit()) printk(KERN_DEBUG "%s: No room for a new IBSS STA entry %pM\n", sdata->name, addr); - return NULL; + return; } if (ifibss->state == IEEE80211_IBSS_MLME_SEARCH) - return NULL; + return; if (compare_ether_addr(bssid, sdata->u.ibss.bssid)) - return NULL; - -#ifdef CONFIG_MAC80211_VERBOSE_DEBUG - wiphy_debug(local->hw.wiphy, "Adding new IBSS station %pM (dev=%s)\n", - addr, sdata->name); -#endif + return; - sta = sta_info_alloc(sdata, addr, gfp); + sta = sta_info_alloc(sdata, addr, GFP_ATOMIC); if (!sta) - return NULL; + return; sta->last_rx = jiffies; - set_sta_flags(sta, WLAN_STA_AUTHORIZED); /* make sure mandatory rates are always added */ sta->sta.supp_rates[band] = supp_rates | ieee80211_mandatory_rates(local, band); - rate_control_rate_init(sta); - - /* If it fails, maybe we raced another insertion? */ - if (sta_info_insert(sta)) - return sta_info_get(sdata, addr); - return sta; + spin_lock(&ifibss->incomplete_lock); + list_add(&sta->list, &ifibss->incomplete_stations); + spin_unlock(&ifibss->incomplete_lock); + ieee80211_queue_work(&local->hw, &sdata->work); } static int ieee80211_sta_active_ibss(struct ieee80211_sub_if_data *sdata) @@ -802,6 +925,7 @@ void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, void ieee80211_ibss_work(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; + struct sta_info *sta; mutex_lock(&ifibss->mtx); @@ -813,6 +937,19 @@ void ieee80211_ibss_work(struct ieee80211_sub_if_data *sdata) if (!ifibss->ssid_len) goto out; + spin_lock_bh(&ifibss->incomplete_lock); + while (!list_empty(&ifibss->incomplete_stations)) { + sta = list_first_entry(&ifibss->incomplete_stations, + struct sta_info, list); + list_del(&sta->list); + spin_unlock_bh(&ifibss->incomplete_lock); + + ieee80211_ibss_finish_sta(sta); + rcu_read_unlock(); + spin_lock_bh(&ifibss->incomplete_lock); + } + spin_unlock_bh(&ifibss->incomplete_lock); + switch (ifibss->state) { case IEEE80211_IBSS_MLME_SEARCH: ieee80211_sta_find_ibss(sdata); @@ -871,6 +1008,8 @@ void ieee80211_ibss_setup_sdata(struct ieee80211_sub_if_data *sdata) setup_timer(&ifibss->timer, ieee80211_ibss_timer, (unsigned long) sdata); mutex_init(&ifibss->mtx); + INIT_LIST_HEAD(&ifibss->incomplete_stations); + spin_lock_init(&ifibss->incomplete_lock); } /* scan finished notification */ @@ -894,12 +1033,18 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata, struct cfg80211_ibss_params *params) { struct sk_buff *skb; + u32 changed = 0; skb = dev_alloc_skb(sdata->local->hw.extra_tx_headroom + - 36 /* bitrates */ + - 34 /* SSID */ + - 3 /* DS params */ + - 4 /* IBSS params */ + + sizeof(struct ieee80211_hdr_3addr) + + 12 /* struct ieee80211_mgmt.u.beacon */ + + 2 + IEEE80211_MAX_SSID_LEN /* max SSID */ + + 2 + 8 /* max Supported Rates */ + + 3 /* max DS params */ + + 4 /* IBSS params */ + + 2 + (IEEE80211_MAX_SUPP_RATES - 8) + + 2 + sizeof(struct ieee80211_ht_cap) + + 2 + sizeof(struct ieee80211_ht_info) + params->ie_len); if (!skb) return -ENOMEM; @@ -920,13 +1065,18 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata, sdata->vif.bss_conf.beacon_int = params->beacon_interval; sdata->u.ibss.channel = params->channel; + sdata->u.ibss.channel_type = params->channel_type; sdata->u.ibss.fixed_channel = params->channel_fixed; /* fix ourselves to that channel now already */ if (params->channel_fixed) { sdata->local->oper_channel = params->channel; - WARN_ON(!ieee80211_set_channel_type(sdata->local, sdata, - NL80211_CHAN_NO_HT)); + if (!ieee80211_set_channel_type(sdata->local, sdata, + params->channel_type)) { + mutex_unlock(&sdata->u.ibss.mtx); + kfree_skb(skb); + return -EINVAL; + } } if (params->ie) { @@ -949,6 +1099,23 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata, ieee80211_recalc_idle(sdata->local); mutex_unlock(&sdata->local->mtx); + /* + * 802.11n-2009 9.13.3.1: In an IBSS, the HT Protection field is + * reserved, but an HT STA shall protect HT transmissions as though + * the HT Protection field were set to non-HT mixed mode. + * + * In an IBSS, the RIFS Mode field of the HT Operation element is + * also reserved, but an HT STA shall operate as though this field + * were set to 1. + */ + + sdata->vif.bss_conf.ht_operation_mode |= + IEEE80211_HT_OP_MODE_PROTECTION_NONHT_MIXED + | IEEE80211_HT_PARAM_RIFS_MODE; + + changed |= BSS_CHANGED_HT; + ieee80211_bss_info_change_notify(sdata, changed); + ieee80211_queue_work(&sdata->local->hw, &sdata->work); return 0; @@ -962,6 +1129,7 @@ int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata) struct cfg80211_bss *cbss; u16 capability; int active_ibss; + struct sta_info *sta; mutex_lock(&sdata->u.ibss.mtx); @@ -991,11 +1159,25 @@ int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata) sta_info_flush(sdata->local, sdata); + spin_lock_bh(&ifibss->incomplete_lock); + while (!list_empty(&ifibss->incomplete_stations)) { + sta = list_first_entry(&ifibss->incomplete_stations, + struct sta_info, list); + list_del(&sta->list); + spin_unlock_bh(&ifibss->incomplete_lock); + + sta_info_free(local, sta); + spin_lock_bh(&ifibss->incomplete_lock); + } + spin_unlock_bh(&ifibss->incomplete_lock); + + netif_carrier_off(sdata->dev); + /* remove beacon */ kfree(sdata->u.ibss.ie); skb = rcu_dereference_protected(sdata->u.ibss.presp, lockdep_is_held(&sdata->u.ibss.mtx)); - rcu_assign_pointer(sdata->u.ibss.presp, NULL); + RCU_INIT_POINTER(sdata->u.ibss.presp, NULL); sdata->vif.bss_conf.ibss_joined = false; ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED | BSS_CHANGED_IBSS); diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 400c09bea639..2f0642d9e154 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -24,6 +24,7 @@ #include <linux/spinlock.h> #include <linux/etherdevice.h> #include <linux/leds.h> +#include <linux/idr.h> #include <net/ieee80211_radiotap.h> #include <net/cfg80211.h> #include <net/mac80211.h> @@ -136,12 +137,12 @@ typedef unsigned __bitwise__ ieee80211_tx_result; #define TX_DROP ((__force ieee80211_tx_result) 1u) #define TX_QUEUED ((__force ieee80211_tx_result) 2u) -#define IEEE80211_TX_FRAGMENTED BIT(0) #define IEEE80211_TX_UNICAST BIT(1) #define IEEE80211_TX_PS_BUFFERED BIT(2) struct ieee80211_tx_data { struct sk_buff *skb; + struct sk_buff_head skbs; struct ieee80211_local *local; struct ieee80211_sub_if_data *sdata; struct sta_info *sta; @@ -149,7 +150,6 @@ struct ieee80211_tx_data { struct ieee80211_channel *channel; - u16 ethertype; unsigned int flags; }; @@ -186,12 +186,15 @@ enum ieee80211_packet_rx_flags { * enum ieee80211_rx_flags - RX data flags * * @IEEE80211_RX_CMNTR: received on cooked monitor already + * @IEEE80211_RX_BEACON_REPORTED: This frame was already reported + * to cfg80211_report_obss_beacon(). * * These flags are used across handling multiple interfaces * for a single frame. */ enum ieee80211_rx_flags { IEEE80211_RX_CMNTR = BIT(0), + IEEE80211_RX_BEACON_REPORTED = BIT(1), }; struct ieee80211_rx_data { @@ -230,6 +233,7 @@ struct beacon_data { struct ieee80211_if_ap { struct beacon_data __rcu *beacon; + struct sk_buff __rcu *probe_resp; struct list_head vlans; @@ -239,6 +243,7 @@ struct ieee80211_if_ap { u8 tim[sizeof(unsigned long) * BITS_TO_LONGS(IEEE80211_MAX_AID + 1)]; struct sk_buff_head ps_bc_buf; atomic_t num_sta_ps; /* number of stations in PS mode */ + atomic_t num_sta_authorized; /* number of authorized stations */ int dtim_count; bool dtim_bc_mc; }; @@ -261,6 +266,7 @@ struct mesh_stats { __u32 fwded_frames; /* Mesh total forwarded frames */ __u32 dropped_frames_ttl; /* Not transmitted since mesh_ttl == 0*/ __u32 dropped_frames_no_route; /* Not transmitted, no route found */ + __u32 dropped_frames_congestion;/* Not forwarded due to congestion */ atomic_t estab_plinks; }; @@ -345,6 +351,7 @@ struct ieee80211_work { struct { struct sk_buff *frame; u32 wait; + bool status; } offchan_tx; }; @@ -389,6 +396,7 @@ struct ieee80211_if_managed { unsigned long timers_running; /* used for quiesce/restart */ bool powersave; /* powersave requested for this iface */ + bool broken_ap; /* AP is broken -- turn off powersave */ enum ieee80211_smps_mode req_smps, /* requested smps mode */ ap_smps, /* smps mode AP thinks we're in */ driver_smps_mode; /* smps mode request */ @@ -442,6 +450,9 @@ struct ieee80211_if_managed { */ int rssi_min_thold, rssi_max_thold; int last_ave_beacon_signal; + + struct ieee80211_ht_cap ht_capa; /* configured ht-cap over-rides */ + struct ieee80211_ht_cap ht_capa_mask; /* Valid parts of ht_capa */ }; struct ieee80211_if_ibss { @@ -464,12 +475,16 @@ struct ieee80211_if_ibss { u8 ssid_len, ie_len; u8 *ie; struct ieee80211_channel *channel; + enum nl80211_channel_type channel_type; unsigned long ibss_join_req; /* probe response/beacon for IBSS */ struct sk_buff __rcu *presp; struct sk_buff *skb; + spinlock_t incomplete_lock; + struct list_head incomplete_stations; + enum { IEEE80211_IBSS_MLME_SEARCH, IEEE80211_IBSS_MLME_JOINED, @@ -504,7 +519,9 @@ struct ieee80211_if_mesh { atomic_t mpaths; /* Timestamp of last SN update */ unsigned long last_sn_update; - /* Timestamp of last SN sent */ + /* Time when it's ok to send next PERR */ + unsigned long next_perr; + /* Timestamp of last PREQ sent */ unsigned long last_preq; struct mesh_rmc *rmc; spinlock_t mesh_preq_queue_lock; @@ -514,6 +531,7 @@ struct ieee80211_if_mesh { struct mesh_config mshcfg; u32 mesh_seqnum; bool accepting_plinks; + int num_gates; const u8 *ie; u8 ie_len; enum { @@ -541,6 +559,7 @@ struct ieee80211_if_mesh { * associated stations and deliver multicast frames both * back to wireless media and to the local net stack. * @IEEE80211_SDATA_DISCONNECT_RESUME: Disconnect after resume. + * @IEEE80211_SDATA_IN_DRIVER: indicates interface was added to driver */ enum ieee80211_sub_if_data_flags { IEEE80211_SDATA_ALLMULTI = BIT(0), @@ -548,6 +567,7 @@ enum ieee80211_sub_if_data_flags { IEEE80211_SDATA_OPERATING_GMODE = BIT(2), IEEE80211_SDATA_DONT_BRIDGE_PACKETS = BIT(3), IEEE80211_SDATA_DISCONNECT_RESUME = BIT(4), + IEEE80211_SDATA_IN_DRIVER = BIT(5), }; /** @@ -598,6 +618,9 @@ struct ieee80211_sub_if_data { struct ieee80211_fragment_entry fragments[IEEE80211_FRAGMENT_MAX]; unsigned int fragment_next; + /* TID bitmap for NoAck policy */ + u16 noack_map; + struct ieee80211_key __rcu *keys[NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS]; struct ieee80211_key __rcu *default_unicast_key; struct ieee80211_key __rcu *default_multicast_key; @@ -607,6 +630,8 @@ struct ieee80211_sub_if_data { __be16 control_port_protocol; bool control_port_no_encrypt; + struct ieee80211_tx_queue_params tx_conf[IEEE80211_MAX_QUEUES]; + struct work_struct work; struct sk_buff_head skb_queue; @@ -660,6 +685,11 @@ enum sdata_queue_type { enum { IEEE80211_RX_MSG = 1, IEEE80211_TX_STATUS_MSG = 2, + IEEE80211_EOSP_MSG = 3, +}; + +struct skb_eosp_msg_data { + u8 sta[ETH_ALEN], iface[ETH_ALEN]; }; enum queue_stop_reason { @@ -669,6 +699,7 @@ enum queue_stop_reason { IEEE80211_QUEUE_STOP_REASON_AGGREGATION, IEEE80211_QUEUE_STOP_REASON_SUSPEND, IEEE80211_QUEUE_STOP_REASON_SKB_ADD, + IEEE80211_QUEUE_STOP_REASON_CHTYPE_CHANGE, }; #ifdef CONFIG_MAC80211_LEDS @@ -712,17 +743,16 @@ enum { * operating channel * @SCAN_SET_CHANNEL: Set the next channel to be scanned * @SCAN_SEND_PROBE: Send probe requests and wait for probe responses - * @SCAN_LEAVE_OPER_CHANNEL: Leave the operating channel, notify the AP - * about us leaving the channel and stop all associated STA interfaces - * @SCAN_ENTER_OPER_CHANNEL: Enter the operating channel again, notify the - * AP about us being back and restart all associated STA interfaces + * @SCAN_SUSPEND: Suspend the scan and go back to operating channel to + * send out data + * @SCAN_RESUME: Resume the scan and scan the next channel */ enum mac80211_scan_state { SCAN_DECISION, SCAN_SET_CHANNEL, SCAN_SEND_PROBE, - SCAN_LEAVE_OPER_CHANNEL, - SCAN_ENTER_OPER_CHANNEL, + SCAN_SUSPEND, + SCAN_RESUME, }; struct ieee80211_local { @@ -748,7 +778,6 @@ struct ieee80211_local { struct workqueue_struct *workqueue; unsigned long queue_stop_reasons[IEEE80211_MAX_QUEUES]; - struct ieee80211_tx_queue_params tx_conf[IEEE80211_MAX_QUEUES]; /* also used to protect ampdu_ac_queue and amdpu_ac_stop_refcnt */ spinlock_t queue_stop_reason_lock; @@ -826,18 +855,15 @@ struct ieee80211_local { /* Station data */ /* - * The mutex only protects the list and counter, - * reads are done in RCU. - * Additionally, the lock protects the hash table, - * the pending list and each BSS's TIM bitmap. + * The mutex only protects the list, hash table and + * counter, reads are done with RCU. */ struct mutex sta_mtx; - spinlock_t sta_lock; + spinlock_t tim_lock; unsigned long num_sta; - struct list_head sta_list, sta_pending_list; + struct list_head sta_list; struct sta_info __rcu *sta_hash[STA_HASH_SIZE]; struct timer_list sta_cleanup; - struct work_struct sta_finish_work; int sta_generation; struct sk_buff_head pending[IEEE80211_MAX_QUEUES]; @@ -942,7 +968,6 @@ struct ieee80211_local { int total_ps_buffered; /* total number of all buffered unicast and * multicast packets for power saving stations */ - int wifi_wme_noack_test; unsigned int wmm_acm; /* bit field of ACM bits (BIT(802.1D tag)) */ /* @@ -1002,7 +1027,9 @@ struct ieee80211_local { unsigned int hw_roc_duration; u32 hw_roc_cookie; bool hw_roc_for_tx; - unsigned long hw_offchan_tx_cookie; + + struct idr ack_status_frames; + spinlock_t ack_status_lock; /* dummy netdev for use w/ NAPI */ struct net_device napi_dev; @@ -1046,7 +1073,7 @@ struct ieee802_11_elems { struct ieee80211_ht_info *ht_info_elem; struct ieee80211_meshconf_ie *mesh_config; u8 *mesh_id; - u8 *peer_link; + u8 *peering; u8 *preq; u8 *prep; u8 *perr; @@ -1054,7 +1081,7 @@ struct ieee802_11_elems { u8 *ch_switch_elem; u8 *country_elem; u8 *pwr_constr_elem; - u8 *quiet_elem; /* first quite element */ + u8 *quiet_elem; /* first quite element */ u8 *timeout_int; /* length of them, respectively */ @@ -1073,7 +1100,7 @@ struct ieee802_11_elems { u8 wmm_info_len; u8 wmm_param_len; u8 mesh_id_len; - u8 peer_link_len; + u8 peering_len; u8 preq_len; u8 prep_len; u8 perr_len; @@ -1145,9 +1172,8 @@ void ieee80211_sta_reset_conn_monitor(struct ieee80211_sub_if_data *sdata); /* IBSS code */ void ieee80211_ibss_notify_scan_completed(struct ieee80211_local *local); void ieee80211_ibss_setup_sdata(struct ieee80211_sub_if_data *sdata); -struct sta_info *ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata, - u8 *bssid, u8 *addr, u32 supp_rates, - gfp_t gfp); +void ieee80211_ibss_rx_no_sta(struct ieee80211_sub_if_data *sdata, + const u8 *bssid, const u8 *addr, u32 supp_rates); int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata, struct cfg80211_ibss_params *params); int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata); @@ -1195,13 +1221,9 @@ int ieee80211_request_sched_scan_stop(struct ieee80211_sub_if_data *sdata); void ieee80211_sched_scan_stopped_work(struct work_struct *work); /* off-channel helpers */ -bool ieee80211_cfg_on_oper_channel(struct ieee80211_local *local); -void ieee80211_offchannel_enable_all_ps(struct ieee80211_local *local, - bool tell_ap); void ieee80211_offchannel_stop_vifs(struct ieee80211_local *local, bool offchannel_ps_enable); void ieee80211_offchannel_return(struct ieee80211_local *local, - bool enable_beaconing, bool offchannel_ps_disable); void ieee80211_hw_roc_setup(struct ieee80211_local *local); @@ -1233,23 +1255,14 @@ netdev_tx_t ieee80211_monitor_start_xmit(struct sk_buff *skb, netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, struct net_device *dev); -/* - * radiotap header for status frames - */ -struct ieee80211_tx_status_rtap_hdr { - struct ieee80211_radiotap_header hdr; - u8 rate; - u8 padding_for_rate; - __le16 tx_flags; - u8 data_retries; -} __packed; - - /* HT */ -void ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_supported_band *sband, +bool ieee80111_cfg_override_disables_ht40(struct ieee80211_sub_if_data *sdata); +void ieee80211_apply_htcap_overrides(struct ieee80211_sub_if_data *sdata, + struct ieee80211_sta_ht_cap *ht_cap); +void ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata, + struct ieee80211_supported_band *sband, struct ieee80211_ht_cap *ht_cap_ie, struct ieee80211_sta_ht_cap *ht_cap); -void ieee80211_send_bar(struct ieee80211_sub_if_data *sdata, u8 *ra, u16 tid, u16 ssn); void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata, const u8 *da, u16 tid, u16 initiator, u16 reason_code); @@ -1333,7 +1346,17 @@ void mac80211_ev_michael_mic_failure(struct ieee80211_sub_if_data *sdata, int ke struct ieee80211_hdr *hdr, const u8 *tsc, gfp_t gfp); void ieee80211_set_wmm_default(struct ieee80211_sub_if_data *sdata); -void ieee80211_tx_skb(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb); +void ieee80211_xmit(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb); + +void ieee80211_tx_skb_tid(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb, int tid); +static void inline ieee80211_tx_skb(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb) +{ + /* Send all internal mgmt frames on VO. Accordingly set TID to 7. */ + ieee80211_tx_skb_tid(sdata, skb, 7); +} + void ieee802_11_parse_elems(u8 *start, size_t len, struct ieee802_11_elems *elems); u32 ieee802_11_parse_elems_crc(u8 *start, size_t len, @@ -1364,11 +1387,11 @@ void ieee80211_stop_queue_by_reason(struct ieee80211_hw *hw, int queue, enum queue_stop_reason reason); void ieee80211_add_pending_skb(struct ieee80211_local *local, struct sk_buff *skb); -int ieee80211_add_pending_skbs(struct ieee80211_local *local, - struct sk_buff_head *skbs); -int ieee80211_add_pending_skbs_fn(struct ieee80211_local *local, - struct sk_buff_head *skbs, - void (*fn)(void *data), void *data); +void ieee80211_add_pending_skbs(struct ieee80211_local *local, + struct sk_buff_head *skbs); +void ieee80211_add_pending_skbs_fn(struct ieee80211_local *local, + struct sk_buff_head *skbs, + void (*fn)(void *data), void *data); void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata, u16 transaction, u16 auth_alg, @@ -1386,7 +1409,7 @@ struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata, void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst, const u8 *ssid, size_t ssid_len, const u8 *ie, size_t ie_len, - u32 ratemask, bool directed); + u32 ratemask, bool directed, bool no_cck); void ieee80211_sta_def_wmm_params(struct ieee80211_sub_if_data *sdata, const size_t supp_rates_len, @@ -1401,6 +1424,12 @@ void ieee80211_recalc_smps(struct ieee80211_local *local); size_t ieee80211_ie_split(const u8 *ies, size_t ielen, const u8 *ids, int n_ids, size_t offset); size_t ieee80211_ie_split_vendor(const u8 *ies, size_t ielen, size_t offset); +u8 *ieee80211_ie_build_ht_cap(u8 *pos, struct ieee80211_sta_ht_cap *ht_cap, + u16 cap); +u8 *ieee80211_ie_build_ht_info(u8 *pos, + struct ieee80211_sta_ht_cap *ht_cap, + struct ieee80211_channel *channel, + enum nl80211_channel_type channel_type); /* internal work items */ void ieee80211_work_init(struct ieee80211_local *local); @@ -1429,6 +1458,8 @@ ieee80211_get_channel_mode(struct ieee80211_local *local, bool ieee80211_set_channel_type(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, enum nl80211_channel_type chantype); +enum nl80211_channel_type +ieee80211_ht_info_to_channel_type(struct ieee80211_ht_info *ht_info); #ifdef CONFIG_MAC80211_NOINLINE #define debug_noinline noinline diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 556e7e6ddf0a..e47768cb8cb3 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -188,11 +188,22 @@ static int ieee80211_do_open(struct net_device *dev, bool coming_up) if (!is_valid_ether_addr(sdata->u.wds.remote_addr)) return -ENOLINK; break; - case NL80211_IFTYPE_AP_VLAN: + case NL80211_IFTYPE_AP_VLAN: { + struct ieee80211_sub_if_data *master; + if (!sdata->bss) return -ENOLINK; + list_add(&sdata->u.vlan.list, &sdata->bss->vlans); + + master = container_of(sdata->bss, + struct ieee80211_sub_if_data, u.ap); + sdata->control_port_protocol = + master->control_port_protocol; + sdata->control_port_no_encrypt = + master->control_port_no_encrypt; break; + } case NL80211_IFTYPE_AP: sdata->bss = &sdata->u.ap; break; @@ -265,7 +276,7 @@ static int ieee80211_do_open(struct net_device *dev, bool coming_up) break; default: if (coming_up) { - res = drv_add_interface(local, &sdata->vif); + res = drv_add_interface(local, sdata); if (res) goto err_stop; } @@ -282,10 +293,18 @@ static int ieee80211_do_open(struct net_device *dev, bool coming_up) changed |= ieee80211_reset_erp_info(sdata); ieee80211_bss_info_change_notify(sdata, changed); - if (sdata->vif.type == NL80211_IFTYPE_STATION) + if (sdata->vif.type == NL80211_IFTYPE_STATION || + sdata->vif.type == NL80211_IFTYPE_ADHOC) netif_carrier_off(dev); else netif_carrier_on(dev); + + /* + * set default queue parameters so drivers don't + * need to initialise the hardware if the hardware + * doesn't start up with sane defaults + */ + ieee80211_set_wmm_default(sdata); } set_bit(SDATA_STATE_RUNNING, &sdata->state); @@ -299,8 +318,9 @@ static int ieee80211_do_open(struct net_device *dev, bool coming_up) goto err_del_interface; } - /* no locking required since STA is not live yet */ - sta->flags |= WLAN_STA_AUTHORIZED; + sta_info_move_state(sta, IEEE80211_STA_AUTH); + sta_info_move_state(sta, IEEE80211_STA_ASSOC); + sta_info_move_state(sta, IEEE80211_STA_AUTHORIZED); res = sta_info_insert(sta); if (res) { @@ -329,15 +349,8 @@ static int ieee80211_do_open(struct net_device *dev, bool coming_up) if (coming_up) local->open_count++; - if (hw_reconf_flags) { + if (hw_reconf_flags) ieee80211_hw_config(local, hw_reconf_flags); - /* - * set default queue parameters so drivers don't - * need to initialise the hardware if the hardware - * doesn't start up with sane defaults - */ - ieee80211_set_wmm_default(sdata); - } ieee80211_recalc_ps(local, -1); @@ -345,7 +358,7 @@ static int ieee80211_do_open(struct net_device *dev, bool coming_up) return 0; err_del_interface: - drv_remove_interface(local, &sdata->vif); + drv_remove_interface(local, sdata); err_stop: if (!local->open_count) drv_stop(local); @@ -450,27 +463,29 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, struct ieee80211_sub_if_data *vlan, *tmpsdata; struct beacon_data *old_beacon = rtnl_dereference(sdata->u.ap.beacon); + struct sk_buff *old_probe_resp = + rtnl_dereference(sdata->u.ap.probe_resp); /* sdata_running will return false, so this will disable */ ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED); - /* remove beacon */ - rcu_assign_pointer(sdata->u.ap.beacon, NULL); + /* remove beacon and probe response */ + RCU_INIT_POINTER(sdata->u.ap.beacon, NULL); + RCU_INIT_POINTER(sdata->u.ap.probe_resp, NULL); synchronize_rcu(); kfree(old_beacon); - - /* free all potentially still buffered bcast frames */ - while ((skb = skb_dequeue(&sdata->u.ap.ps_bc_buf))) { - local->total_ps_buffered--; - dev_kfree_skb(skb); - } + kfree_skb(old_probe_resp); /* down all dependent devices, that is VLANs */ list_for_each_entry_safe(vlan, tmpsdata, &sdata->u.ap.vlans, u.vlan.list) dev_close(vlan->dev); WARN_ON(!list_empty(&sdata->u.ap.vlans)); + + /* free all potentially still buffered bcast frames */ + local->total_ps_buffered -= skb_queue_len(&sdata->u.ap.ps_bc_buf); + skb_queue_purge(&sdata->u.ap.ps_bc_buf); } if (going_down) @@ -522,7 +537,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, ieee80211_free_keys(sdata); if (going_down) - drv_remove_interface(local, &sdata->vif); + drv_remove_interface(local, sdata); } sdata->bss = NULL; @@ -645,7 +660,7 @@ static const struct net_device_ops ieee80211_dataif_ops = { .ndo_stop = ieee80211_stop, .ndo_uninit = ieee80211_teardown_sdata, .ndo_start_xmit = ieee80211_subif_start_xmit, - .ndo_set_multicast_list = ieee80211_set_multicast_list, + .ndo_set_rx_mode = ieee80211_set_multicast_list, .ndo_change_mtu = ieee80211_change_mtu, .ndo_set_mac_address = ieee80211_change_mac, .ndo_select_queue = ieee80211_netdev_select_queue, @@ -658,7 +673,6 @@ static u16 ieee80211_monitor_select_queue(struct net_device *dev, struct ieee80211_local *local = sdata->local; struct ieee80211_hdr *hdr; struct ieee80211_radiotap_header *rtap = (void *)skb->data; - u8 *p; if (local->hw.queues < 4) return 0; @@ -669,19 +683,7 @@ static u16 ieee80211_monitor_select_queue(struct net_device *dev, hdr = (void *)((u8 *)skb->data + le16_to_cpu(rtap->it_len)); - if (!ieee80211_is_data(hdr->frame_control)) { - skb->priority = 7; - return ieee802_1d_to_ac[skb->priority]; - } - if (!ieee80211_is_data_qos(hdr->frame_control)) { - skb->priority = 0; - return ieee802_1d_to_ac[skb->priority]; - } - - p = ieee80211_get_qos_ctl(hdr); - skb->priority = *p & IEEE80211_QOS_CTL_TAG1D_MASK; - - return ieee80211_downgrade_queue(local, skb); + return ieee80211_select_queue_80211(local, skb, hdr); } static const struct net_device_ops ieee80211_monitorif_ops = { @@ -689,7 +691,7 @@ static const struct net_device_ops ieee80211_monitorif_ops = { .ndo_stop = ieee80211_stop, .ndo_uninit = ieee80211_teardown_sdata, .ndo_start_xmit = ieee80211_monitor_start_xmit, - .ndo_set_multicast_list = ieee80211_set_multicast_list, + .ndo_set_rx_mode = ieee80211_set_multicast_list, .ndo_change_mtu = ieee80211_change_mtu, .ndo_set_mac_address = eth_mac_addr, .ndo_select_queue = ieee80211_monitor_select_queue, @@ -852,6 +854,8 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata, sdata->control_port_protocol = cpu_to_be16(ETH_P_PAE); sdata->control_port_no_encrypt = false; + sdata->noack_map = 0; + /* only monitor differs */ sdata->dev->type = ARPHRD_ETHER; @@ -1214,6 +1218,9 @@ void ieee80211_if_remove(struct ieee80211_sub_if_data *sdata) list_del_rcu(&sdata->list); mutex_unlock(&sdata->local->iflist_mtx); + if (ieee80211_vif_is_mesh(&sdata->vif)) + mesh_path_flush_by_iface(sdata); + synchronize_rcu(); unregister_netdevice(sdata->dev); } @@ -1233,6 +1240,9 @@ void ieee80211_remove_interfaces(struct ieee80211_local *local) list_for_each_entry_safe(sdata, tmp, &local->interfaces, list) { list_del(&sdata->list); + if (ieee80211_vif_is_mesh(&sdata->vif)) + mesh_path_flush_by_iface(sdata); + unregister_netdevice_queue(sdata->dev, &unreg_list); } mutex_unlock(&local->iflist_mtx); diff --git a/net/mac80211/key.c b/net/mac80211/key.c index 5150c6d11b57..87a89741432d 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -15,6 +15,7 @@ #include <linux/rcupdate.h> #include <linux/rtnetlink.h> #include <linux/slab.h> +#include <linux/export.h> #include <net/mac80211.h> #include "ieee80211_i.h" #include "driver-ops.h" @@ -133,9 +134,13 @@ static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key) key->flags |= KEY_FLAG_UPLOADED_TO_HARDWARE; if (!((key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIC) || - (key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV))) + (key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV) || + (key->conf.flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE))) sdata->crypto_tx_tailroom_needed_cnt--; + WARN_ON((key->conf.flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE) && + (key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV)); + return 0; } @@ -178,7 +183,8 @@ static void ieee80211_key_disable_hw_accel(struct ieee80211_key *key) sdata = key->sdata; if (!((key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIC) || - (key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV))) + (key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV) || + (key->conf.flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE))) increment_tailroom_need_count(sdata); if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) @@ -464,7 +470,7 @@ int ieee80211_key_link(struct ieee80211_key *key, * some hardware cannot handle TKIP with QoS, so * we indicate whether QoS could be in use. */ - if (test_sta_flags(sta, WLAN_STA_WME)) + if (test_sta_flag(sta, WLAN_STA_WME)) key->conf.flags |= IEEE80211_KEY_FLAG_WMM_STA; } else { if (sdata->vif.type == NL80211_IFTYPE_STATION) { @@ -478,7 +484,7 @@ int ieee80211_key_link(struct ieee80211_key *key, /* same here, the AP could be using QoS */ ap = sta_info_get(key->sdata, key->sdata->u.mgd.bssid); if (ap) { - if (test_sta_flags(ap, WLAN_STA_WME)) + if (test_sta_flag(ap, WLAN_STA_WME)) key->conf.flags |= IEEE80211_KEY_FLAG_WMM_STA; } diff --git a/net/mac80211/led.c b/net/mac80211/led.c index 14590332c81c..1bf7903496f8 100644 --- a/net/mac80211/led.c +++ b/net/mac80211/led.c @@ -9,6 +9,7 @@ /* just for IFNAMSIZ */ #include <linux/if.h> #include <linux/slab.h> +#include <linux/export.h> #include "led.h" void ieee80211_led_rx(struct ieee80211_local *local) diff --git a/net/mac80211/main.c b/net/mac80211/main.c index acb44230b251..0a0d94ad9b08 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -19,7 +19,7 @@ #include <linux/if_arp.h> #include <linux/rtnetlink.h> #include <linux/bitmap.h> -#include <linux/pm_qos_params.h> +#include <linux/pm_qos.h> #include <linux/inetdevice.h> #include <net/net_namespace.h> #include <net/cfg80211.h> @@ -47,7 +47,7 @@ void ieee80211_configure_filter(struct ieee80211_local *local) if (atomic_read(&local->iff_allmultis)) new_flags |= FIF_ALLMULTI; - if (local->monitors || local->scanning) + if (local->monitors || test_bit(SCAN_SW_SCANNING, &local->scanning)) new_flags |= FIF_BCN_PRBRESP_PROMISC; if (local->fif_probe_req || local->probe_req_reg) @@ -92,50 +92,9 @@ static void ieee80211_reconfig_filter(struct work_struct *work) ieee80211_configure_filter(local); } -/* - * Returns true if we are logically configured to be on - * the operating channel AND the hardware-conf is currently - * configured on the operating channel. Compares channel-type - * as well. - */ -bool ieee80211_cfg_on_oper_channel(struct ieee80211_local *local) -{ - struct ieee80211_channel *chan, *scan_chan; - enum nl80211_channel_type channel_type; - - /* This logic needs to match logic in ieee80211_hw_config */ - if (local->scan_channel) { - chan = local->scan_channel; - /* If scanning on oper channel, use whatever channel-type - * is currently in use. - */ - if (chan == local->oper_channel) - channel_type = local->_oper_channel_type; - else - channel_type = NL80211_CHAN_NO_HT; - } else if (local->tmp_channel) { - chan = scan_chan = local->tmp_channel; - channel_type = local->tmp_channel_type; - } else { - chan = local->oper_channel; - channel_type = local->_oper_channel_type; - } - - if (chan != local->oper_channel || - channel_type != local->_oper_channel_type) - return false; - - /* Check current hardware-config against oper_channel. */ - if ((local->oper_channel != local->hw.conf.channel) || - (local->_oper_channel_type != local->hw.conf.channel_type)) - return false; - - return true; -} - int ieee80211_hw_config(struct ieee80211_local *local, u32 changed) { - struct ieee80211_channel *chan, *scan_chan; + struct ieee80211_channel *chan; int ret = 0; int power; enum nl80211_channel_type channel_type; @@ -143,14 +102,12 @@ int ieee80211_hw_config(struct ieee80211_local *local, u32 changed) might_sleep(); - scan_chan = local->scan_channel; - /* If this off-channel logic ever changes, ieee80211_on_oper_channel * may need to change as well. */ offchannel_flag = local->hw.conf.flags & IEEE80211_CONF_OFFCHANNEL; - if (scan_chan) { - chan = scan_chan; + if (local->scan_channel) { + chan = local->scan_channel; /* If scanning on oper channel, use whatever channel-type * is currently in use. */ @@ -159,7 +116,7 @@ int ieee80211_hw_config(struct ieee80211_local *local, u32 changed) else channel_type = NL80211_CHAN_NO_HT; } else if (local->tmp_channel) { - chan = scan_chan = local->tmp_channel; + chan = local->tmp_channel; channel_type = local->tmp_channel_type; } else { chan = local->oper_channel; @@ -193,8 +150,8 @@ int ieee80211_hw_config(struct ieee80211_local *local, u32 changed) changed |= IEEE80211_CONF_CHANGE_SMPS; } - if ((local->scanning & SCAN_SW_SCANNING) || - (local->scanning & SCAN_HW_SCANNING)) + if (test_bit(SCAN_SW_SCANNING, &local->scanning) || + test_bit(SCAN_HW_SCANNING, &local->scanning)) power = chan->max_power; else power = local->power_constr_level ? @@ -325,6 +282,8 @@ u32 ieee80211_reset_erp_info(struct ieee80211_sub_if_data *sdata) static void ieee80211_tasklet_handler(unsigned long data) { struct ieee80211_local *local = (struct ieee80211_local *) data; + struct sta_info *sta, *tmp; + struct skb_eosp_msg_data *eosp_data; struct sk_buff *skb; while ((skb = skb_dequeue(&local->skb_queue)) || @@ -340,6 +299,18 @@ static void ieee80211_tasklet_handler(unsigned long data) skb->pkt_type = 0; ieee80211_tx_status(local_to_hw(local), skb); break; + case IEEE80211_EOSP_MSG: + eosp_data = (void *)skb->cb; + for_each_sta_info(local, eosp_data->sta, sta, tmp) { + /* skip wrong virtual interface */ + if (memcmp(eosp_data->iface, + sta->sdata->vif.addr, ETH_ALEN)) + continue; + clear_sta_flag(sta, WLAN_STA_SP); + break; + } + dev_kfree_skb(skb); + break; default: WARN(1, "mac80211: Packet is of unknown type %d\n", skb->pkt_type); @@ -422,9 +393,6 @@ static int ieee80211_ifa_changed(struct notifier_block *nb, sdata = IEEE80211_DEV_TO_SUB_IF(ndev); bss_conf = &sdata->vif.bss_conf; - if (!ieee80211_sdata_running(sdata)) - return NOTIFY_DONE; - /* ARP filtering is only supported in managed mode */ if (sdata->vif.type != NL80211_IFTYPE_STATION) return NOTIFY_DONE; @@ -453,7 +421,7 @@ static int ieee80211_ifa_changed(struct notifier_block *nb, } bss_conf->arp_addr_cnt = c; - /* Configure driver only if associated */ + /* Configure driver only if associated (which also implies it is up) */ if (ifmgd->associated) { bss_conf->arp_filter_enabled = sdata->arp_filter_state; ieee80211_bss_info_change_notify(sdata, @@ -546,6 +514,19 @@ ieee80211_default_mgmt_stypes[NUM_NL80211_IFTYPES] = { }, }; +static const struct ieee80211_ht_cap mac80211_ht_capa_mod_mask = { + .ampdu_params_info = IEEE80211_HT_AMPDU_PARM_FACTOR | + IEEE80211_HT_AMPDU_PARM_DENSITY, + + .cap_info = cpu_to_le16(IEEE80211_HT_CAP_SUP_WIDTH_20_40 | + IEEE80211_HT_CAP_MAX_AMSDU | + IEEE80211_HT_CAP_SGI_40), + .mcs = { + .rx_mask = { 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, }, + }, +}; + struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, const struct ieee80211_ops *ops) { @@ -581,7 +562,13 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, wiphy->flags |= WIPHY_FLAG_NETNS_OK | WIPHY_FLAG_4ADDR_AP | - WIPHY_FLAG_4ADDR_STATION; + WIPHY_FLAG_4ADDR_STATION | + WIPHY_FLAG_REPORTS_OBSS | + WIPHY_FLAG_OFFCHAN_TX | + WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL; + + wiphy->features = NL80211_FEATURE_SK_TX_STATUS | + NL80211_FEATURE_HT_IBSS; if (!ops->set_key) wiphy->flags |= WIPHY_FLAG_IBSS_RSN; @@ -594,7 +581,7 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, local->hw.priv = (char *)local + ALIGN(sizeof(*local), NETDEV_ALIGN); - BUG_ON(!ops->tx); + BUG_ON(!ops->tx && !ops->tx_frags); BUG_ON(!ops->start); BUG_ON(!ops->stop); BUG_ON(!ops->config); @@ -608,11 +595,13 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, local->hw.max_rates = 1; local->hw.max_report_rates = 0; local->hw.max_rx_aggregation_subframes = IEEE80211_MAX_AMPDU_BUF; + local->hw.max_tx_aggregation_subframes = IEEE80211_MAX_AMPDU_BUF; local->hw.conf.long_frame_max_tx_count = wiphy->retry_long; local->hw.conf.short_frame_max_tx_count = wiphy->retry_short; local->user_power_level = -1; local->uapsd_queues = IEEE80211_DEFAULT_UAPSD_QUEUES; local->uapsd_max_sp_len = IEEE80211_DEFAULT_MAX_SP_LEN; + wiphy->ht_capa_mod_mask = &mac80211_ht_capa_mod_mask; INIT_LIST_HEAD(&local->interfaces); @@ -655,6 +644,11 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, INIT_WORK(&local->sched_scan_stopped_work, ieee80211_sched_scan_stopped_work); + spin_lock_init(&local->ack_status_lock); + idr_init(&local->ack_status_frames); + /* preallocate at least one entry */ + idr_pre_get(&local->ack_status_frames, GFP_KERNEL); + sta_info_init(local); for (i = 0; i < IEEE80211_MAX_QUEUES; i++) { @@ -742,6 +736,12 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) if (!local->int_scan_req) return -ENOMEM; + for (band = 0; band < IEEE80211_NUM_BANDS; band++) { + if (!local->hw.wiphy->bands[band]) + continue; + local->int_scan_req->rates[band] = (u32) -1; + } + /* if low-level driver supports AP, we also support VLAN */ if (local->hw.wiphy->interface_modes & BIT(NL80211_IFTYPE_AP)) { hw->wiphy->interface_modes |= BIT(NL80211_IFTYPE_AP_VLAN); @@ -862,6 +862,10 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) if (local->ops->sched_scan_start) local->hw.wiphy->flags |= WIPHY_FLAG_SUPPORTS_SCHED_SCAN; + /* mac80211 based drivers don't support internal TDLS setup */ + if (local->hw.wiphy->flags & WIPHY_FLAG_SUPPORTS_TDLS) + local->hw.wiphy->flags |= WIPHY_FLAG_TDLS_EXTERNAL_SETUP; + result = wiphy_register(local->hw.wiphy); if (result < 0) goto fail_wiphy_register; @@ -885,12 +889,8 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) * and we need some headroom for passing the frame to monitor * interfaces, but never both at the same time. */ -#ifndef __CHECKER__ - BUILD_BUG_ON(IEEE80211_TX_STATUS_HEADROOM != - sizeof(struct ieee80211_tx_status_rtap_hdr)); -#endif local->tx_headroom = max_t(unsigned int , local->hw.extra_tx_headroom, - sizeof(struct ieee80211_tx_status_rtap_hdr)); + IEEE80211_TX_STATUS_HEADROOM); debugfs_hw_add(local); @@ -1030,6 +1030,13 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw) } EXPORT_SYMBOL(ieee80211_unregister_hw); +static int ieee80211_free_ack_frame(int id, void *p, void *data) +{ + WARN_ONCE(1, "Have pending ack frames!\n"); + kfree_skb(p); + return 0; +} + void ieee80211_free_hw(struct ieee80211_hw *hw) { struct ieee80211_local *local = hw_to_local(hw); @@ -1040,6 +1047,10 @@ void ieee80211_free_hw(struct ieee80211_hw *hw) if (local->wiphy_ciphers_allocated) kfree(local->hw.wiphy->cipher_suites); + idr_for_each(&local->ack_status_frames, + ieee80211_free_ack_frame, NULL); + idr_destroy(&local->ack_status_frames); + wiphy_free(local->hw.wiphy); } EXPORT_SYMBOL(ieee80211_free_hw); diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 29e9980c8e60..c707c8bf6d2c 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -13,10 +13,6 @@ #include "ieee80211_i.h" #include "mesh.h" -#define IEEE80211_MESH_PEER_INACTIVITY_LIMIT (1800 * HZ) -#define IEEE80211_MESH_HOUSEKEEPING_INTERVAL (60 * HZ) -#define IEEE80211_MESH_RANN_INTERVAL (1 * HZ) - #define MESHCONF_CAPAB_ACCEPT_PLINKS 0x01 #define MESHCONF_CAPAB_FORWARDING 0x08 @@ -27,6 +23,17 @@ int mesh_allocated; static struct kmem_cache *rm_cache; +#ifdef CONFIG_MAC80211_MESH +bool mesh_action_is_path_sel(struct ieee80211_mgmt *mgmt) +{ + return (mgmt->u.action.u.mesh_action.action_code == + WLAN_MESH_ACTION_HWMP_PATH_SELECTION); +} +#else +bool mesh_action_is_path_sel(struct ieee80211_mgmt *mgmt) +{ return false; } +#endif + void ieee80211s_init(void) { mesh_pathtbl_init(); @@ -69,6 +76,7 @@ static void ieee80211_mesh_housekeeping_timer(unsigned long data) bool mesh_matches_local(struct ieee802_11_elems *ie, struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; + struct ieee80211_local *local = sdata->local; /* * As support for each feature is added, check for matching @@ -80,15 +88,23 @@ bool mesh_matches_local(struct ieee802_11_elems *ie, struct ieee80211_sub_if_dat * - MDA enabled * - Power management control on fc */ - if (ifmsh->mesh_id_len == ie->mesh_id_len && - memcmp(ifmsh->mesh_id, ie->mesh_id, ie->mesh_id_len) == 0 && - (ifmsh->mesh_pp_id == ie->mesh_config->meshconf_psel) && - (ifmsh->mesh_pm_id == ie->mesh_config->meshconf_pmetric) && - (ifmsh->mesh_cc_id == ie->mesh_config->meshconf_congest) && - (ifmsh->mesh_sp_id == ie->mesh_config->meshconf_synch) && - (ifmsh->mesh_auth_id == ie->mesh_config->meshconf_auth)) - return true; - + if (!(ifmsh->mesh_id_len == ie->mesh_id_len && + memcmp(ifmsh->mesh_id, ie->mesh_id, ie->mesh_id_len) == 0 && + (ifmsh->mesh_pp_id == ie->mesh_config->meshconf_psel) && + (ifmsh->mesh_pm_id == ie->mesh_config->meshconf_pmetric) && + (ifmsh->mesh_cc_id == ie->mesh_config->meshconf_congest) && + (ifmsh->mesh_sp_id == ie->mesh_config->meshconf_synch) && + (ifmsh->mesh_auth_id == ie->mesh_config->meshconf_auth))) + goto mismatch; + + /* disallow peering with mismatched channel types for now */ + if (ie->ht_info_elem && + (local->_oper_channel_type != + ieee80211_ht_info_to_channel_type(ie->ht_info_elem))) + goto mismatch; + + return true; +mismatch: return false; } @@ -193,10 +209,9 @@ int mesh_rmc_check(u8 *sa, struct ieee80211s_hdr *mesh_hdr, } p = kmem_cache_alloc(rm_cache, GFP_ATOMIC); - if (!p) { - printk(KERN_DEBUG "o11s: could not allocate RMC entry\n"); + if (!p) return 0; - } + p->seqnum = seqnum; p->exp_time = jiffies + RMC_TIMEOUT; memcpy(p->sa, sa, ETH_ALEN); @@ -204,36 +219,127 @@ int mesh_rmc_check(u8 *sa, struct ieee80211s_hdr *mesh_hdr, return 0; } -void mesh_mgmt_ies_add(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata) +int +mesh_add_meshconf_ie(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata) { - struct ieee80211_local *local = sdata->local; - struct ieee80211_supported_band *sband; + struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; + u8 *pos, neighbors; + u8 meshconf_len = sizeof(struct ieee80211_meshconf_ie); + + if (skb_tailroom(skb) < 2 + meshconf_len) + return -ENOMEM; + + pos = skb_put(skb, 2 + meshconf_len); + *pos++ = WLAN_EID_MESH_CONFIG; + *pos++ = meshconf_len; + + /* Active path selection protocol ID */ + *pos++ = ifmsh->mesh_pp_id; + /* Active path selection metric ID */ + *pos++ = ifmsh->mesh_pm_id; + /* Congestion control mode identifier */ + *pos++ = ifmsh->mesh_cc_id; + /* Synchronization protocol identifier */ + *pos++ = ifmsh->mesh_sp_id; + /* Authentication Protocol identifier */ + *pos++ = ifmsh->mesh_auth_id; + /* Mesh Formation Info - number of neighbors */ + neighbors = atomic_read(&ifmsh->mshstats.estab_plinks); + /* Number of neighbor mesh STAs or 15 whichever is smaller */ + neighbors = (neighbors > 15) ? 15 : neighbors; + *pos++ = neighbors << 1; + /* Mesh capability */ + ifmsh->accepting_plinks = mesh_plink_availables(sdata); + *pos = MESHCONF_CAPAB_FORWARDING; + *pos++ |= ifmsh->accepting_plinks ? + MESHCONF_CAPAB_ACCEPT_PLINKS : 0x00; + *pos++ = 0x00; + + return 0; +} + +int +mesh_add_meshid_ie(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; u8 *pos; - int len, i, rate; - u8 neighbors; - sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; - len = sband->n_bitrates; - if (len > 8) - len = 8; - pos = skb_put(skb, len + 2); - *pos++ = WLAN_EID_SUPP_RATES; - *pos++ = len; - for (i = 0; i < len; i++) { - rate = sband->bitrates[i].bitrate; - *pos++ = (u8) (rate / 5); + if (skb_tailroom(skb) < 2 + ifmsh->mesh_id_len) + return -ENOMEM; + + pos = skb_put(skb, 2 + ifmsh->mesh_id_len); + *pos++ = WLAN_EID_MESH_ID; + *pos++ = ifmsh->mesh_id_len; + if (ifmsh->mesh_id_len) + memcpy(pos, ifmsh->mesh_id, ifmsh->mesh_id_len); + + return 0; +} + +int +mesh_add_vendor_ies(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; + u8 offset, len; + const u8 *data; + + if (!ifmsh->ie || !ifmsh->ie_len) + return 0; + + /* fast-forward to vendor IEs */ + offset = ieee80211_ie_split_vendor(ifmsh->ie, ifmsh->ie_len, 0); + + if (offset) { + len = ifmsh->ie_len - offset; + data = ifmsh->ie + offset; + if (skb_tailroom(skb) < len) + return -ENOMEM; + memcpy(skb_put(skb, len), data, len); } - if (sband->n_bitrates > len) { - pos = skb_put(skb, sband->n_bitrates - len + 2); - *pos++ = WLAN_EID_EXT_SUPP_RATES; - *pos++ = sband->n_bitrates - len; - for (i = len; i < sband->n_bitrates; i++) { - rate = sband->bitrates[i].bitrate; - *pos++ = (u8) (rate / 5); + return 0; +} + +int +mesh_add_rsn_ie(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; + u8 len = 0; + const u8 *data; + + if (!ifmsh->ie || !ifmsh->ie_len) + return 0; + + /* find RSN IE */ + data = ifmsh->ie; + while (data < ifmsh->ie + ifmsh->ie_len) { + if (*data == WLAN_EID_RSN) { + len = data[1] + 2; + break; } + data++; } + if (len) { + if (skb_tailroom(skb) < len) + return -ENOMEM; + memcpy(skb_put(skb, len), data, len); + } + + return 0; +} + +int mesh_add_ds_params_ie(struct sk_buff *skb, + struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_local *local = sdata->local; + struct ieee80211_supported_band *sband; + u8 *pos; + + if (skb_tailroom(skb) < 3) + return -ENOMEM; + + sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; if (sband->band == IEEE80211_BAND_2GHZ) { pos = skb_put(skb, 2 + 1); *pos++ = WLAN_EID_DS_PARAMS; @@ -241,53 +347,52 @@ void mesh_mgmt_ies_add(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata) *pos++ = ieee80211_frequency_to_channel(local->hw.conf.channel->center_freq); } - pos = skb_put(skb, 2 + sdata->u.mesh.mesh_id_len); - *pos++ = WLAN_EID_MESH_ID; - *pos++ = sdata->u.mesh.mesh_id_len; - if (sdata->u.mesh.mesh_id_len) - memcpy(pos, sdata->u.mesh.mesh_id, sdata->u.mesh.mesh_id_len); - - pos = skb_put(skb, 2 + sizeof(struct ieee80211_meshconf_ie)); - *pos++ = WLAN_EID_MESH_CONFIG; - *pos++ = sizeof(struct ieee80211_meshconf_ie); + return 0; +} - /* Active path selection protocol ID */ - *pos++ = sdata->u.mesh.mesh_pp_id; +int mesh_add_ht_cap_ie(struct sk_buff *skb, + struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_local *local = sdata->local; + struct ieee80211_supported_band *sband; + u8 *pos; - /* Active path selection metric ID */ - *pos++ = sdata->u.mesh.mesh_pm_id; + sband = local->hw.wiphy->bands[local->oper_channel->band]; + if (!sband->ht_cap.ht_supported || + local->_oper_channel_type == NL80211_CHAN_NO_HT) + return 0; - /* Congestion control mode identifier */ - *pos++ = sdata->u.mesh.mesh_cc_id; + if (skb_tailroom(skb) < 2 + sizeof(struct ieee80211_ht_cap)) + return -ENOMEM; - /* Synchronization protocol identifier */ - *pos++ = sdata->u.mesh.mesh_sp_id; + pos = skb_put(skb, 2 + sizeof(struct ieee80211_ht_cap)); + ieee80211_ie_build_ht_cap(pos, &sband->ht_cap, sband->ht_cap.cap); - /* Authentication Protocol identifier */ - *pos++ = sdata->u.mesh.mesh_auth_id; + return 0; +} - /* Mesh Formation Info - number of neighbors */ - neighbors = atomic_read(&sdata->u.mesh.mshstats.estab_plinks); - /* Number of neighbor mesh STAs or 15 whichever is smaller */ - neighbors = (neighbors > 15) ? 15 : neighbors; - *pos++ = neighbors << 1; +int mesh_add_ht_info_ie(struct sk_buff *skb, + struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_local *local = sdata->local; + struct ieee80211_channel *channel = local->oper_channel; + enum nl80211_channel_type channel_type = local->_oper_channel_type; + struct ieee80211_supported_band *sband = + local->hw.wiphy->bands[channel->band]; + struct ieee80211_sta_ht_cap *ht_cap = &sband->ht_cap; + u8 *pos; - /* Mesh capability */ - sdata->u.mesh.accepting_plinks = mesh_plink_availables(sdata); - *pos = MESHCONF_CAPAB_FORWARDING; - *pos++ |= sdata->u.mesh.accepting_plinks ? - MESHCONF_CAPAB_ACCEPT_PLINKS : 0x00; - *pos++ = 0x00; + if (!ht_cap->ht_supported || channel_type == NL80211_CHAN_NO_HT) + return 0; - if (sdata->u.mesh.ie) { - int len = sdata->u.mesh.ie_len; - const u8 *data = sdata->u.mesh.ie; - if (skb_tailroom(skb) > len) - memcpy(skb_put(skb, len), data, len); - } -} + if (skb_tailroom(skb) < 2 + sizeof(struct ieee80211_ht_info)) + return -ENOMEM; + pos = skb_put(skb, 2 + sizeof(struct ieee80211_ht_info)); + ieee80211_ie_build_ht_info(pos, ht_cap, channel, channel_type); + return 0; +} static void ieee80211_mesh_path_timer(unsigned long data) { struct ieee80211_sub_if_data *sdata = @@ -352,8 +457,7 @@ int ieee80211_fill_mesh_addresses(struct ieee80211_hdr *hdr, __le16 *fc, memcpy(hdr->addr3, meshsa, ETH_ALEN); return 24; } else { - *fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS | - IEEE80211_FCTL_TODS); + *fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS); /* RA TA DA SA */ memset(hdr->addr1, 0, ETH_ALEN); /* RA is resolved later */ memcpy(hdr->addr2, meshsa, ETH_ALEN); @@ -425,7 +529,8 @@ static void ieee80211_mesh_rootpath(struct ieee80211_sub_if_data *sdata) mesh_path_tx_root_frame(sdata); mod_timer(&ifmsh->mesh_path_root_timer, - round_jiffies(jiffies + IEEE80211_MESH_RANN_INTERVAL)); + round_jiffies(TU_TO_EXP_TIME( + ifmsh->mshcfg.dot11MeshHWMPRannInterval))); } #ifdef CONFIG_PM @@ -433,7 +538,7 @@ void ieee80211_mesh_quiesce(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; - /* use atomic bitops in case both timers fire at the same time */ + /* use atomic bitops in case all timers fire at the same time */ if (del_timer_sync(&ifmsh->housekeeping_timer)) set_bit(TMR_RUNNING_HK, &ifmsh->timers_running); @@ -557,11 +662,18 @@ static void ieee80211_mesh_rx_mgmt_action(struct ieee80211_sub_if_data *sdata, struct ieee80211_rx_status *rx_status) { switch (mgmt->u.action.category) { - case WLAN_CATEGORY_MESH_ACTION: - mesh_rx_plink_frame(sdata, mgmt, len, rx_status); + case WLAN_CATEGORY_SELF_PROTECTED: + switch (mgmt->u.action.u.self_prot.action_code) { + case WLAN_SP_MESH_PEERING_OPEN: + case WLAN_SP_MESH_PEERING_CLOSE: + case WLAN_SP_MESH_PEERING_CONFIRM: + mesh_rx_plink_frame(sdata, mgmt, len, rx_status); + break; + } break; - case WLAN_CATEGORY_MESH_PATH_SEL: - mesh_rx_path_sel_frame(sdata, mgmt, len); + case WLAN_CATEGORY_MESH_ACTION: + if (mesh_action_is_path_sel(mgmt)) + mesh_rx_path_sel_frame(sdata, mgmt, len); break; } } @@ -633,9 +745,11 @@ void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata) ifmsh->accepting_plinks = true; ifmsh->preq_id = 0; ifmsh->sn = 0; + ifmsh->num_gates = 0; atomic_set(&ifmsh->mpaths, 0); mesh_rmc_init(sdata); ifmsh->last_preq = jiffies; + ifmsh->next_perr = jiffies; /* Allocate all mesh structures when creating the first mesh interface. */ if (!mesh_allocated) ieee80211s_init(); diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h index 249e733362e7..bd14bd26a2b6 100644 --- a/net/mac80211/mesh.h +++ b/net/mac80211/mesh.h @@ -31,6 +31,8 @@ * @MESH_PATH_FIXED: the mesh path has been manually set and should not be * modified * @MESH_PATH_RESOLVED: the mesh path can has been resolved + * @MESH_PATH_REQ_QUEUED: there is an unsent path request for this destination + * already queued up, waiting for the discovery process to start. * * MESH_PATH_RESOLVED is used by the mesh path timer to * decide when to stop or cancel the mesh path discovery. @@ -41,6 +43,7 @@ enum mesh_path_flags { MESH_PATH_SN_VALID = BIT(2), MESH_PATH_FIXED = BIT(3), MESH_PATH_RESOLVED = BIT(4), + MESH_PATH_REQ_QUEUED = BIT(5), }; /** @@ -80,7 +83,10 @@ enum mesh_deferred_task_flags { * retry * @discovery_retries: number of discovery retries * @flags: mesh path flags, as specified on &enum mesh_path_flags - * @state_lock: mesh path state lock + * @state_lock: mesh path state lock used to protect changes to the + * mpath itself. No need to take this lock when adding or removing + * an mpath to a hash bucket on a path table. + * @is_gate: the destination station of this path is a mesh gate * * * The combination of dst and sdata is unique in the mesh path table. Since the @@ -104,6 +110,7 @@ struct mesh_path { u8 discovery_retries; enum mesh_path_flags flags; spinlock_t state_lock; + bool is_gate; }; /** @@ -120,6 +127,9 @@ struct mesh_path { * buckets * @mean_chain_len: maximum average length for the hash buckets' list, if it is * reached, the table will grow + * @known_gates: list of known mesh gates and their mpaths by the station. The + * gate's mpath may or may not be resolved and active. + * * rcu_head: RCU head to free the table */ struct mesh_table { @@ -133,6 +143,8 @@ struct mesh_table { int (*copy_node) (struct hlist_node *p, struct mesh_table *newtbl); int size_order; int mean_chain_len; + struct hlist_head *known_gates; + spinlock_t gates_lock; struct rcu_head rcu_head; }; @@ -166,6 +178,8 @@ struct mesh_rmc { u32 idx_mask; }; +#define IEEE80211_MESH_PEER_INACTIVITY_LIMIT (1800 * HZ) +#define IEEE80211_MESH_HOUSEKEEPING_INTERVAL (60 * HZ) #define MESH_DEFAULT_BEACON_INTERVAL 1000 /* in 1024 us units */ @@ -177,14 +191,6 @@ struct mesh_rmc { /* Maximum number of paths per interface */ #define MESH_MAX_MPATHS 1024 -/* Pending ANA approval */ -#define MESH_PATH_SEL_ACTION 0 - -/* PERR reason codes */ -#define PEER_RCODE_UNSPECIFIED 11 -#define PERR_RCODE_NO_ROUTE 12 -#define PERR_RCODE_DEST_UNREACH 13 - /* Public interfaces */ /* Various */ int ieee80211_fill_mesh_addresses(struct ieee80211_hdr *hdr, __le16 *fc, @@ -199,6 +205,20 @@ bool mesh_matches_local(struct ieee802_11_elems *ie, void mesh_ids_set_default(struct ieee80211_if_mesh *mesh); void mesh_mgmt_ies_add(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata); +int mesh_add_meshconf_ie(struct sk_buff *skb, + struct ieee80211_sub_if_data *sdata); +int mesh_add_meshid_ie(struct sk_buff *skb, + struct ieee80211_sub_if_data *sdata); +int mesh_add_rsn_ie(struct sk_buff *skb, + struct ieee80211_sub_if_data *sdata); +int mesh_add_vendor_ies(struct sk_buff *skb, + struct ieee80211_sub_if_data *sdata); +int mesh_add_ds_params_ie(struct sk_buff *skb, + struct ieee80211_sub_if_data *sdata); +int mesh_add_ht_cap_ie(struct sk_buff *skb, + struct ieee80211_sub_if_data *sdata); +int mesh_add_ht_info_ie(struct sk_buff *skb, + struct ieee80211_sub_if_data *sdata); void mesh_rmc_free(struct ieee80211_sub_if_data *sdata); int mesh_rmc_init(struct ieee80211_sub_if_data *sdata); void ieee80211s_init(void); @@ -213,6 +233,8 @@ void ieee80211_mesh_root_setup(struct ieee80211_if_mesh *ifmsh); /* Mesh paths */ int mesh_nexthop_lookup(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata); +int mesh_nexthop_resolve(struct sk_buff *skb, + struct ieee80211_sub_if_data *sdata); void mesh_path_start_discovery(struct ieee80211_sub_if_data *sdata); struct mesh_path *mesh_path_lookup(u8 *dst, struct ieee80211_sub_if_data *sdata); @@ -223,10 +245,13 @@ struct mesh_path *mesh_path_lookup_by_idx(int idx, struct ieee80211_sub_if_data *sdata); void mesh_path_fix_nexthop(struct mesh_path *mpath, struct sta_info *next_hop); void mesh_path_expire(struct ieee80211_sub_if_data *sdata); -void mesh_path_flush(struct ieee80211_sub_if_data *sdata); void mesh_rx_path_sel_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt, size_t len); int mesh_path_add(u8 *dst, struct ieee80211_sub_if_data *sdata); + +int mesh_path_add_gate(struct mesh_path *mpath); +int mesh_path_send_to_gates(struct mesh_path *mpath); +int mesh_gate_num(struct ieee80211_sub_if_data *sdata); /* Mesh plinks */ void mesh_neighbour_update(u8 *hw_addr, u32 rates, struct ieee80211_sub_if_data *sdata, @@ -256,12 +281,14 @@ void mesh_pathtbl_unregister(void); int mesh_path_del(u8 *addr, struct ieee80211_sub_if_data *sdata); void mesh_path_timer(unsigned long data); void mesh_path_flush_by_nexthop(struct sta_info *sta); +void mesh_path_flush_by_iface(struct ieee80211_sub_if_data *sdata); void mesh_path_discard_frame(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata); void mesh_path_quiesce(struct ieee80211_sub_if_data *sdata); void mesh_path_restart(struct ieee80211_sub_if_data *sdata); void mesh_path_tx_root_frame(struct ieee80211_sub_if_data *sdata); +bool mesh_action_is_path_sel(struct ieee80211_mgmt *mgmt); extern int mesh_paths_generation; #ifdef CONFIG_MAC80211_MESH diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index 3460108810d5..73abb7524b2c 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -8,10 +8,12 @@ */ #include <linux/slab.h> +#include "wme.h" #include "mesh.h" #ifdef CONFIG_MAC80211_VERBOSE_MHWMP_DEBUG -#define mhwmp_dbg(fmt, args...) printk(KERN_DEBUG "Mesh HWMP: " fmt, ##args) +#define mhwmp_dbg(fmt, args...) \ + printk(KERN_DEBUG "Mesh HWMP (%s): " fmt "\n", sdata->name, ##args) #else #define mhwmp_dbg(fmt, args...) do { (void)(0); } while (0) #endif @@ -68,12 +70,12 @@ static inline u32 u16_field_get(u8 *preq_elem, int offset, bool ae) #define PREP_IE_FLAGS(x) PREQ_IE_FLAGS(x) #define PREP_IE_HOPCOUNT(x) PREQ_IE_HOPCOUNT(x) #define PREP_IE_TTL(x) PREQ_IE_TTL(x) -#define PREP_IE_ORIG_ADDR(x) (x + 3) -#define PREP_IE_ORIG_SN(x) u32_field_get(x, 9, 0) +#define PREP_IE_ORIG_ADDR(x) (AE_F_SET(x) ? x + 27 : x + 21) +#define PREP_IE_ORIG_SN(x) u32_field_get(x, 27, AE_F_SET(x)) #define PREP_IE_LIFETIME(x) u32_field_get(x, 13, AE_F_SET(x)) #define PREP_IE_METRIC(x) u32_field_get(x, 17, AE_F_SET(x)) -#define PREP_IE_TARGET_ADDR(x) (AE_F_SET(x) ? x + 27 : x + 21) -#define PREP_IE_TARGET_SN(x) u32_field_get(x, 27, AE_F_SET(x)) +#define PREP_IE_TARGET_ADDR(x) (x + 3) +#define PREP_IE_TARGET_SN(x) u32_field_get(x, 9, 0) #define PERR_IE_TTL(x) (*(x)) #define PERR_IE_TARGET_FLAGS(x) (*(x + 2)) @@ -111,20 +113,20 @@ static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags, struct ieee80211_sub_if_data *sdata) { struct ieee80211_local *local = sdata->local; - struct sk_buff *skb = dev_alloc_skb(local->hw.extra_tx_headroom + 400); + struct sk_buff *skb; struct ieee80211_mgmt *mgmt; - u8 *pos; - int ie_len; + u8 *pos, ie_len; + int hdr_len = offsetof(struct ieee80211_mgmt, u.action.u.mesh_action) + + sizeof(mgmt->u.action.u.mesh_action); + skb = dev_alloc_skb(local->hw.extra_tx_headroom + + hdr_len + + 2 + 37); /* max HWMP IE */ if (!skb) return -1; skb_reserve(skb, local->hw.extra_tx_headroom); - /* 25 is the size of the common mgmt part (24) plus the size of the - * common action part (1) - */ - mgmt = (struct ieee80211_mgmt *) - skb_put(skb, 25 + sizeof(mgmt->u.action.u.mesh_action)); - memset(mgmt, 0, 25 + sizeof(mgmt->u.action.u.mesh_action)); + mgmt = (struct ieee80211_mgmt *) skb_put(skb, hdr_len); + memset(mgmt, 0, hdr_len); mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_ACTION); @@ -132,24 +134,25 @@ static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags, memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN); /* BSSID == SA */ memcpy(mgmt->bssid, sdata->vif.addr, ETH_ALEN); - mgmt->u.action.category = WLAN_CATEGORY_MESH_PATH_SEL; - mgmt->u.action.u.mesh_action.action_code = MESH_PATH_SEL_ACTION; + mgmt->u.action.category = WLAN_CATEGORY_MESH_ACTION; + mgmt->u.action.u.mesh_action.action_code = + WLAN_MESH_ACTION_HWMP_PATH_SELECTION; switch (action) { case MPATH_PREQ: - mhwmp_dbg("sending PREQ to %pM\n", target); + mhwmp_dbg("sending PREQ to %pM", target); ie_len = 37; pos = skb_put(skb, 2 + ie_len); *pos++ = WLAN_EID_PREQ; break; case MPATH_PREP: - mhwmp_dbg("sending PREP to %pM\n", target); + mhwmp_dbg("sending PREP to %pM", target); ie_len = 31; pos = skb_put(skb, 2 + ie_len); *pos++ = WLAN_EID_PREP; break; case MPATH_RANN: - mhwmp_dbg("sending RANN from %pM\n", orig_addr); + mhwmp_dbg("sending RANN from %pM", orig_addr); ie_len = sizeof(struct ieee80211_rann_ie); pos = skb_put(skb, 2 + ie_len); *pos++ = WLAN_EID_RANN; @@ -163,35 +166,63 @@ static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags, *pos++ = flags; *pos++ = hop_count; *pos++ = ttl; - if (action == MPATH_PREQ) { - memcpy(pos, &preq_id, 4); + if (action == MPATH_PREP) { + memcpy(pos, target, ETH_ALEN); + pos += ETH_ALEN; + memcpy(pos, &target_sn, 4); pos += 4; - } - memcpy(pos, orig_addr, ETH_ALEN); - pos += ETH_ALEN; - memcpy(pos, &orig_sn, 4); - pos += 4; - if (action != MPATH_RANN) { - memcpy(pos, &lifetime, 4); + } else { + if (action == MPATH_PREQ) { + memcpy(pos, &preq_id, 4); + pos += 4; + } + memcpy(pos, orig_addr, ETH_ALEN); + pos += ETH_ALEN; + memcpy(pos, &orig_sn, 4); pos += 4; } + memcpy(pos, &lifetime, 4); /* interval for RANN */ + pos += 4; memcpy(pos, &metric, 4); pos += 4; if (action == MPATH_PREQ) { - /* destination count */ - *pos++ = 1; + *pos++ = 1; /* destination count */ *pos++ = target_flags; - } - if (action != MPATH_RANN) { memcpy(pos, target, ETH_ALEN); pos += ETH_ALEN; memcpy(pos, &target_sn, 4); + pos += 4; + } else if (action == MPATH_PREP) { + memcpy(pos, orig_addr, ETH_ALEN); + pos += ETH_ALEN; + memcpy(pos, &orig_sn, 4); + pos += 4; } ieee80211_tx_skb(sdata, skb); return 0; } + +/* Headroom is not adjusted. Caller should ensure that skb has sufficient + * headroom in case the frame is encrypted. */ +static void prepare_frame_for_deferred_tx(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb) +{ + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + + skb_set_mac_header(skb, 0); + skb_set_network_header(skb, 0); + skb_set_transport_header(skb, 0); + + /* Send all internal mgmt frames on VO. Accordingly set TID to 7. */ + skb_set_queue_mapping(skb, IEEE80211_AC_VO); + skb->priority = 7; + + info->control.vif = &sdata->vif; + ieee80211_set_qos_hdr(sdata, skb); +} + /** * mesh_send_path error - Sends a PERR mesh management frame * @@ -199,34 +230,44 @@ static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags, * @target_sn: SN of the broken destination * @target_rcode: reason code for this PERR * @ra: node this frame is addressed to + * + * Note: This function may be called with driver locks taken that the driver + * also acquires in the TX path. To avoid a deadlock we don't transmit the + * frame directly but add it to the pending queue instead. */ int mesh_path_error_tx(u8 ttl, u8 *target, __le32 target_sn, __le16 target_rcode, const u8 *ra, struct ieee80211_sub_if_data *sdata) { struct ieee80211_local *local = sdata->local; - struct sk_buff *skb = dev_alloc_skb(local->hw.extra_tx_headroom + 400); + struct sk_buff *skb; + struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; struct ieee80211_mgmt *mgmt; - u8 *pos; - int ie_len; + u8 *pos, ie_len; + int hdr_len = offsetof(struct ieee80211_mgmt, u.action.u.mesh_action) + + sizeof(mgmt->u.action.u.mesh_action); + + if (time_before(jiffies, ifmsh->next_perr)) + return -EAGAIN; + skb = dev_alloc_skb(local->hw.extra_tx_headroom + + hdr_len + + 2 + 15 /* PERR IE */); if (!skb) return -1; - skb_reserve(skb, local->hw.extra_tx_headroom); - /* 25 is the size of the common mgmt part (24) plus the size of the - * common action part (1) - */ - mgmt = (struct ieee80211_mgmt *) - skb_put(skb, 25 + sizeof(mgmt->u.action.u.mesh_action)); - memset(mgmt, 0, 25 + sizeof(mgmt->u.action.u.mesh_action)); + skb_reserve(skb, local->tx_headroom + local->hw.extra_tx_headroom); + mgmt = (struct ieee80211_mgmt *) skb_put(skb, hdr_len); + memset(mgmt, 0, hdr_len); mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_ACTION); memcpy(mgmt->da, ra, ETH_ALEN); memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN); - /* BSSID is left zeroed, wildcard value */ - mgmt->u.action.category = WLAN_CATEGORY_MESH_PATH_SEL; - mgmt->u.action.u.mesh_action.action_code = MESH_PATH_SEL_ACTION; + /* BSSID == SA */ + memcpy(mgmt->bssid, sdata->vif.addr, ETH_ALEN); + mgmt->u.action.category = WLAN_CATEGORY_MESH_ACTION; + mgmt->u.action.u.mesh_action.action_code = + WLAN_MESH_ACTION_HWMP_PATH_SELECTION; ie_len = 15; pos = skb_put(skb, 2 + ie_len); *pos++ = WLAN_EID_PERR; @@ -251,7 +292,11 @@ int mesh_path_error_tx(u8 ttl, u8 *target, __le32 target_sn, pos += 4; memcpy(pos, &target_rcode, 2); - ieee80211_tx_skb(sdata, skb); + /* see note in function header */ + prepare_frame_for_deferred_tx(sdata, skb); + ifmsh->next_perr = TU_TO_EXP_TIME( + ifmsh->mshcfg.dot11MeshHWMPperrMinInterval); + ieee80211_add_pending_skb(local, skb); return 0; } @@ -354,15 +399,13 @@ static u32 hwmp_route_info_get(struct ieee80211_sub_if_data *sdata, orig_metric = PREQ_IE_METRIC(hwmp_ie); break; case MPATH_PREP: - /* Originator here refers to the MP that was the destination in - * the Path Request. The draft refers to that MP as the - * destination address, even though usually it is the origin of - * the PREP frame. We divert from the nomenclature in the draft + /* Originator here refers to the MP that was the target in the + * Path Request. We divert from the nomenclature in the draft * so that we can easily use a single function to gather path * information from both PREQ and PREP frames. */ - orig_addr = PREP_IE_ORIG_ADDR(hwmp_ie); - orig_sn = PREP_IE_ORIG_SN(hwmp_ie); + orig_addr = PREP_IE_TARGET_ADDR(hwmp_ie); + orig_sn = PREP_IE_TARGET_SN(hwmp_ie); orig_lifetime = PREP_IE_LIFETIME(hwmp_ie); orig_metric = PREP_IE_METRIC(hwmp_ie); break; @@ -449,7 +492,6 @@ static u32 hwmp_route_info_get(struct ieee80211_sub_if_data *sdata, if (fresh_info) { mesh_path_assign_nexthop(mpath, sta); - mpath->flags &= ~MESH_PATH_SN_VALID; mpath->metric = last_hop_metric; mpath->exp_time = time_after(mpath->exp_time, exp_time) ? mpath->exp_time : exp_time; @@ -484,10 +526,10 @@ static void hwmp_preq_frame_process(struct ieee80211_sub_if_data *sdata, orig_sn = PREQ_IE_ORIG_SN(preq_elem); target_flags = PREQ_IE_TARGET_F(preq_elem); - mhwmp_dbg("received PREQ from %pM\n", orig_addr); + mhwmp_dbg("received PREQ from %pM", orig_addr); if (memcmp(target_addr, sdata->vif.addr, ETH_ALEN) == 0) { - mhwmp_dbg("PREQ is for us\n"); + mhwmp_dbg("PREQ is for us"); forward = false; reply = true; metric = 0; @@ -523,10 +565,10 @@ static void hwmp_preq_frame_process(struct ieee80211_sub_if_data *sdata, lifetime = PREQ_IE_LIFETIME(preq_elem); ttl = ifmsh->mshcfg.element_ttl; if (ttl != 0) { - mhwmp_dbg("replying to the PREQ\n"); - mesh_path_sel_frame_tx(MPATH_PREP, 0, target_addr, - cpu_to_le32(target_sn), 0, orig_addr, - cpu_to_le32(orig_sn), mgmt->sa, 0, ttl, + mhwmp_dbg("replying to the PREQ"); + mesh_path_sel_frame_tx(MPATH_PREP, 0, orig_addr, + cpu_to_le32(orig_sn), 0, target_addr, + cpu_to_le32(target_sn), mgmt->sa, 0, ttl, cpu_to_le32(lifetime), cpu_to_le32(metric), 0, sdata); } else @@ -543,7 +585,7 @@ static void hwmp_preq_frame_process(struct ieee80211_sub_if_data *sdata, ifmsh->mshstats.dropped_frames_ttl++; return; } - mhwmp_dbg("forwarding the PREQ from %pM\n", orig_addr); + mhwmp_dbg("forwarding the PREQ from %pM", orig_addr); --ttl; flags = PREQ_IE_FLAGS(preq_elem); preq_id = PREQ_IE_PREQ_ID(preq_elem); @@ -578,16 +620,10 @@ static void hwmp_prep_frame_process(struct ieee80211_sub_if_data *sdata, u8 next_hop[ETH_ALEN]; u32 target_sn, orig_sn, lifetime; - mhwmp_dbg("received PREP from %pM\n", PREP_IE_ORIG_ADDR(prep_elem)); + mhwmp_dbg("received PREP from %pM", PREP_IE_ORIG_ADDR(prep_elem)); - /* Note that we divert from the draft nomenclature and denominate - * destination to what the draft refers to as origininator. So in this - * function destnation refers to the final destination of the PREP, - * which corresponds with the originator of the PREQ which this PREP - * replies - */ - target_addr = PREP_IE_TARGET_ADDR(prep_elem); - if (memcmp(target_addr, sdata->vif.addr, ETH_ALEN) == 0) + orig_addr = PREP_IE_ORIG_ADDR(prep_elem); + if (memcmp(orig_addr, sdata->vif.addr, ETH_ALEN) == 0) /* destination, no forwarding required */ return; @@ -598,7 +634,7 @@ static void hwmp_prep_frame_process(struct ieee80211_sub_if_data *sdata, } rcu_read_lock(); - mpath = mesh_path_lookup(target_addr, sdata); + mpath = mesh_path_lookup(orig_addr, sdata); if (mpath) spin_lock_bh(&mpath->state_lock); else @@ -613,7 +649,7 @@ static void hwmp_prep_frame_process(struct ieee80211_sub_if_data *sdata, flags = PREP_IE_FLAGS(prep_elem); lifetime = PREP_IE_LIFETIME(prep_elem); hopcount = PREP_IE_HOPCOUNT(prep_elem) + 1; - orig_addr = PREP_IE_ORIG_ADDR(prep_elem); + target_addr = PREP_IE_TARGET_ADDR(prep_elem); target_sn = PREP_IE_TARGET_SN(prep_elem); orig_sn = PREP_IE_ORIG_SN(prep_elem); @@ -684,6 +720,8 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata, u8 ttl, flags, hopcount; u8 *orig_addr; u32 orig_sn, metric; + u32 interval = ifmsh->mshcfg.dot11MeshHWMPRannInterval; + bool root_is_gate; ttl = rann->rann_ttl; if (ttl <= 1) { @@ -692,12 +730,19 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata, } ttl--; flags = rann->rann_flags; + root_is_gate = !!(flags & RANN_FLAG_IS_GATE); orig_addr = rann->rann_addr; orig_sn = rann->rann_seq; hopcount = rann->rann_hopcount; hopcount++; metric = rann->rann_metric; - mhwmp_dbg("received RANN from %pM\n", orig_addr); + + /* Ignore our own RANNs */ + if (memcmp(orig_addr, sdata->vif.addr, ETH_ALEN) == 0) + return; + + mhwmp_dbg("received RANN from %pM (is_gate=%d)", orig_addr, + root_is_gate); rcu_read_lock(); mpath = mesh_path_lookup(orig_addr, sdata); @@ -709,18 +754,28 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata, sdata->u.mesh.mshstats.dropped_frames_no_route++; return; } - mesh_queue_preq(mpath, - PREQ_Q_F_START | PREQ_Q_F_REFRESH); } + + if ((!(mpath->flags & (MESH_PATH_ACTIVE | MESH_PATH_RESOLVING)) || + time_after(jiffies, mpath->exp_time - 1*HZ)) && + !(mpath->flags & MESH_PATH_FIXED)) { + mhwmp_dbg("%s time to refresh root mpath %pM", sdata->name, + orig_addr); + mesh_queue_preq(mpath, PREQ_Q_F_START | PREQ_Q_F_REFRESH); + } + if (mpath->sn < orig_sn) { mesh_path_sel_frame_tx(MPATH_RANN, flags, orig_addr, cpu_to_le32(orig_sn), 0, NULL, 0, broadcast_addr, - hopcount, ttl, 0, + hopcount, ttl, cpu_to_le32(interval), cpu_to_le32(metric + mpath->metric), 0, sdata); mpath->sn = orig_sn; } + if (root_is_gate) + mesh_path_add_gate(mpath); + rcu_read_unlock(); } @@ -732,11 +787,20 @@ void mesh_rx_path_sel_frame(struct ieee80211_sub_if_data *sdata, struct ieee802_11_elems elems; size_t baselen; u32 last_hop_metric; + struct sta_info *sta; /* need action_code */ if (len < IEEE80211_MIN_ACTION_SIZE + 1) return; + rcu_read_lock(); + sta = sta_info_get(sdata, mgmt->sa); + if (!sta || sta->plink_state != NL80211_PLINK_ESTAB) { + rcu_read_unlock(); + return; + } + rcu_read_unlock(); + baselen = (u8 *) mgmt->u.action.u.mesh_action.variable - (u8 *) mgmt; ieee802_11_parse_elems(mgmt->u.action.u.mesh_action.variable, len - baselen, &elems); @@ -788,25 +852,36 @@ static void mesh_queue_preq(struct mesh_path *mpath, u8 flags) preq_node = kmalloc(sizeof(struct mesh_preq_queue), GFP_ATOMIC); if (!preq_node) { - mhwmp_dbg("could not allocate PREQ node\n"); + mhwmp_dbg("could not allocate PREQ node"); return; } - spin_lock(&ifmsh->mesh_preq_queue_lock); + spin_lock_bh(&ifmsh->mesh_preq_queue_lock); if (ifmsh->preq_queue_len == MAX_PREQ_QUEUE_LEN) { - spin_unlock(&ifmsh->mesh_preq_queue_lock); + spin_unlock_bh(&ifmsh->mesh_preq_queue_lock); kfree(preq_node); if (printk_ratelimit()) - mhwmp_dbg("PREQ node queue full\n"); + mhwmp_dbg("PREQ node queue full"); + return; + } + + spin_lock(&mpath->state_lock); + if (mpath->flags & MESH_PATH_REQ_QUEUED) { + spin_unlock(&mpath->state_lock); + spin_unlock_bh(&ifmsh->mesh_preq_queue_lock); + kfree(preq_node); return; } memcpy(preq_node->dst, mpath->dst, ETH_ALEN); preq_node->flags = flags; + mpath->flags |= MESH_PATH_REQ_QUEUED; + spin_unlock(&mpath->state_lock); + list_add_tail(&preq_node->list, &ifmsh->preq_queue.list); ++ifmsh->preq_queue_len; - spin_unlock(&ifmsh->mesh_preq_queue_lock); + spin_unlock_bh(&ifmsh->mesh_preq_queue_lock); if (time_after(jiffies, ifmsh->last_preq + min_preq_int_jiff(sdata))) ieee80211_queue_work(&sdata->local->hw, &sdata->work); @@ -855,6 +930,7 @@ void mesh_path_start_discovery(struct ieee80211_sub_if_data *sdata) goto enddiscovery; spin_lock_bh(&mpath->state_lock); + mpath->flags &= ~MESH_PATH_REQ_QUEUED; if (preq_node->flags & PREQ_Q_F_START) { if (mpath->flags & MESH_PATH_RESOLVING) { spin_unlock_bh(&mpath->state_lock); @@ -906,71 +982,97 @@ enddiscovery: kfree(preq_node); } -/** - * mesh_nexthop_lookup - put the appropriate next hop on a mesh frame +/* mesh_nexthop_resolve - lookup next hop for given skb and start path + * discovery if no forwarding information is found. * * @skb: 802.11 frame to be sent * @sdata: network subif the frame will be sent through * - * Returns: 0 if the next hop was found. Nonzero otherwise. If no next hop is - * found, the function will start a path discovery and queue the frame so it is - * sent when the path is resolved. This means the caller must not free the skb - * in this case. + * Returns: 0 if the next hop was found and -ENOENT if the frame was queued. + * skb is freeed here if no mpath could be allocated. */ -int mesh_nexthop_lookup(struct sk_buff *skb, - struct ieee80211_sub_if_data *sdata) +int mesh_nexthop_resolve(struct sk_buff *skb, + struct ieee80211_sub_if_data *sdata) { - struct sk_buff *skb_to_free = NULL; - struct mesh_path *mpath; - struct sta_info *next_hop; struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + struct mesh_path *mpath; + struct sk_buff *skb_to_free = NULL; u8 *target_addr = hdr->addr3; int err = 0; rcu_read_lock(); - mpath = mesh_path_lookup(target_addr, sdata); + err = mesh_nexthop_lookup(skb, sdata); + if (!err) + goto endlookup; + /* no nexthop found, start resolving */ + mpath = mesh_path_lookup(target_addr, sdata); if (!mpath) { mesh_path_add(target_addr, sdata); mpath = mesh_path_lookup(target_addr, sdata); if (!mpath) { - sdata->u.mesh.mshstats.dropped_frames_no_route++; + mesh_path_discard_frame(skb, sdata); err = -ENOSPC; goto endlookup; } } - if (mpath->flags & MESH_PATH_ACTIVE) { - if (time_after(jiffies, - mpath->exp_time - - msecs_to_jiffies(sdata->u.mesh.mshcfg.path_refresh_time)) && - !memcmp(sdata->vif.addr, hdr->addr4, ETH_ALEN) && - !(mpath->flags & MESH_PATH_RESOLVING) && - !(mpath->flags & MESH_PATH_FIXED)) { - mesh_queue_preq(mpath, - PREQ_Q_F_START | PREQ_Q_F_REFRESH); - } - next_hop = rcu_dereference(mpath->next_hop); - if (next_hop) - memcpy(hdr->addr1, next_hop->sta.addr, ETH_ALEN); - else - err = -ENOENT; - } else { - struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); - if (!(mpath->flags & MESH_PATH_RESOLVING)) { - /* Start discovery only if it is not running yet */ - mesh_queue_preq(mpath, PREQ_Q_F_START); - } + if (!(mpath->flags & MESH_PATH_RESOLVING)) + mesh_queue_preq(mpath, PREQ_Q_F_START); + + if (skb_queue_len(&mpath->frame_queue) >= MESH_FRAME_QUEUE_LEN) + skb_to_free = skb_dequeue(&mpath->frame_queue); + + info->flags |= IEEE80211_TX_INTFL_NEED_TXPROCESSING; + ieee80211_set_qos_hdr(sdata, skb); + skb_queue_tail(&mpath->frame_queue, skb); + err = -ENOENT; + if (skb_to_free) + mesh_path_discard_frame(skb_to_free, sdata); - if (skb_queue_len(&mpath->frame_queue) >= - MESH_FRAME_QUEUE_LEN) - skb_to_free = skb_dequeue(&mpath->frame_queue); +endlookup: + rcu_read_unlock(); + return err; +} +/** + * mesh_nexthop_lookup - put the appropriate next hop on a mesh frame. Calling + * this function is considered "using" the associated mpath, so preempt a path + * refresh if this mpath expires soon. + * + * @skb: 802.11 frame to be sent + * @sdata: network subif the frame will be sent through + * + * Returns: 0 if the next hop was found. Nonzero otherwise. + */ +int mesh_nexthop_lookup(struct sk_buff *skb, + struct ieee80211_sub_if_data *sdata) +{ + struct mesh_path *mpath; + struct sta_info *next_hop; + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; + u8 *target_addr = hdr->addr3; + int err = -ENOENT; + + rcu_read_lock(); + mpath = mesh_path_lookup(target_addr, sdata); - info->flags |= IEEE80211_TX_INTFL_NEED_TXPROCESSING; - skb_queue_tail(&mpath->frame_queue, skb); - if (skb_to_free) - mesh_path_discard_frame(skb_to_free, sdata); - err = -ENOENT; + if (!mpath || !(mpath->flags & MESH_PATH_ACTIVE)) + goto endlookup; + + if (time_after(jiffies, + mpath->exp_time - + msecs_to_jiffies(sdata->u.mesh.mshcfg.path_refresh_time)) && + !memcmp(sdata->vif.addr, hdr->addr4, ETH_ALEN) && + !(mpath->flags & MESH_PATH_RESOLVING) && + !(mpath->flags & MESH_PATH_FIXED)) + mesh_queue_preq(mpath, PREQ_Q_F_START | PREQ_Q_F_REFRESH); + + next_hop = rcu_dereference(mpath->next_hop); + if (next_hop) { + memcpy(hdr->addr1, next_hop->sta.addr, ETH_ALEN); + memcpy(hdr->addr2, sdata->vif.addr, ETH_ALEN); + err = 0; } endlookup: @@ -982,35 +1084,47 @@ void mesh_path_timer(unsigned long data) { struct mesh_path *mpath = (void *) data; struct ieee80211_sub_if_data *sdata = mpath->sdata; + int ret; if (sdata->local->quiescing) return; spin_lock_bh(&mpath->state_lock); if (mpath->flags & MESH_PATH_RESOLVED || - (!(mpath->flags & MESH_PATH_RESOLVING))) + (!(mpath->flags & MESH_PATH_RESOLVING))) { mpath->flags &= ~(MESH_PATH_RESOLVING | MESH_PATH_RESOLVED); - else if (mpath->discovery_retries < max_preq_retries(sdata)) { + spin_unlock_bh(&mpath->state_lock); + } else if (mpath->discovery_retries < max_preq_retries(sdata)) { ++mpath->discovery_retries; mpath->discovery_timeout *= 2; + mpath->flags &= ~MESH_PATH_REQ_QUEUED; + spin_unlock_bh(&mpath->state_lock); mesh_queue_preq(mpath, 0); } else { mpath->flags = 0; mpath->exp_time = jiffies; - mesh_path_flush_pending(mpath); + spin_unlock_bh(&mpath->state_lock); + if (!mpath->is_gate && mesh_gate_num(sdata) > 0) { + ret = mesh_path_send_to_gates(mpath); + if (ret) + mhwmp_dbg("no gate was reachable"); + } else + mesh_path_flush_pending(mpath); } - - spin_unlock_bh(&mpath->state_lock); } void mesh_path_tx_root_frame(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; + u32 interval = ifmsh->mshcfg.dot11MeshHWMPRannInterval; + u8 flags; - mesh_path_sel_frame_tx(MPATH_RANN, 0, sdata->vif.addr, + flags = (ifmsh->mshcfg.dot11MeshGateAnnouncementProtocol) + ? RANN_FLAG_IS_GATE : 0; + mesh_path_sel_frame_tx(MPATH_RANN, flags, sdata->vif.addr, cpu_to_le32(++ifmsh->sn), 0, NULL, 0, broadcast_addr, 0, sdata->u.mesh.mshcfg.element_ttl, - 0, 0, 0, sdata); + cpu_to_le32(interval), 0, 0, sdata); } diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c index 068ee6518254..edf167e3b8f3 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -14,9 +14,16 @@ #include <linux/spinlock.h> #include <linux/string.h> #include <net/mac80211.h> +#include "wme.h" #include "ieee80211_i.h" #include "mesh.h" +#ifdef CONFIG_MAC80211_VERBOSE_MPATH_DEBUG +#define mpath_dbg(fmt, args...) printk(KERN_DEBUG fmt, ##args) +#else +#define mpath_dbg(fmt, args...) do { (void)(0); } while (0) +#endif + /* There will be initially 2^INIT_PATHS_SIZE_ORDER buckets */ #define INIT_PATHS_SIZE_ORDER 2 @@ -42,8 +49,10 @@ static struct mesh_table __rcu *mpp_paths; /* Store paths for MPP&MAP */ int mesh_paths_generation; /* This lock will have the grow table function as writer and add / delete nodes - * as readers. When reading the table (i.e. doing lookups) we are well protected - * by RCU + * as readers. RCU provides sufficient protection only when reading the table + * (i.e. doing lookups). Adding or adding or removing nodes requires we take + * the read lock or we risk operating on an old table. The write lock is only + * needed when modifying the number of buckets a table. */ static DEFINE_RWLOCK(pathtbl_resize_lock); @@ -103,6 +112,7 @@ static struct mesh_table *mesh_table_alloc(int size_order) sizeof(newtbl->hash_rnd)); for (i = 0; i <= newtbl->hash_mask; i++) spin_lock_init(&newtbl->hashwlock[i]); + spin_lock_init(&newtbl->gates_lock); return newtbl; } @@ -118,6 +128,7 @@ static void mesh_table_free(struct mesh_table *tbl, bool free_leafs) { struct hlist_head *mesh_hash; struct hlist_node *p, *q; + struct mpath_node *gate; int i; mesh_hash = tbl->hash_buckets; @@ -129,6 +140,17 @@ static void mesh_table_free(struct mesh_table *tbl, bool free_leafs) } spin_unlock_bh(&tbl->hashwlock[i]); } + if (free_leafs) { + spin_lock_bh(&tbl->gates_lock); + hlist_for_each_entry_safe(gate, p, q, + tbl->known_gates, list) { + hlist_del(&gate->list); + kfree(gate); + } + kfree(tbl->known_gates); + spin_unlock_bh(&tbl->gates_lock); + } + __mesh_table_free(tbl); } @@ -146,6 +168,7 @@ static int mesh_table_grow(struct mesh_table *oldtbl, newtbl->free_node = oldtbl->free_node; newtbl->mean_chain_len = oldtbl->mean_chain_len; newtbl->copy_node = oldtbl->copy_node; + newtbl->known_gates = oldtbl->known_gates; atomic_set(&newtbl->entries, atomic_read(&oldtbl->entries)); oldhash = oldtbl->hash_buckets; @@ -198,6 +221,7 @@ void mesh_path_assign_nexthop(struct mesh_path *mpath, struct sta_info *sta) while ((skb = __skb_dequeue(&mpath->frame_queue)) != NULL) { hdr = (struct ieee80211_hdr *) skb->data; memcpy(hdr->addr1, sta->sta.addr, ETH_ALEN); + memcpy(hdr->addr2, mpath->sdata->vif.addr, ETH_ALEN); __skb_queue_tail(&tmpq, skb); } @@ -205,62 +229,129 @@ void mesh_path_assign_nexthop(struct mesh_path *mpath, struct sta_info *sta) spin_unlock_irqrestore(&mpath->frame_queue.lock, flags); } +static void prepare_for_gate(struct sk_buff *skb, char *dst_addr, + struct mesh_path *gate_mpath) +{ + struct ieee80211_hdr *hdr; + struct ieee80211s_hdr *mshdr; + int mesh_hdrlen, hdrlen; + char *next_hop; + + hdr = (struct ieee80211_hdr *) skb->data; + hdrlen = ieee80211_hdrlen(hdr->frame_control); + mshdr = (struct ieee80211s_hdr *) (skb->data + hdrlen); + + if (!(mshdr->flags & MESH_FLAGS_AE)) { + /* size of the fixed part of the mesh header */ + mesh_hdrlen = 6; + + /* make room for the two extended addresses */ + skb_push(skb, 2 * ETH_ALEN); + memmove(skb->data, hdr, hdrlen + mesh_hdrlen); + + hdr = (struct ieee80211_hdr *) skb->data; + + /* we preserve the previous mesh header and only add + * the new addreses */ + mshdr = (struct ieee80211s_hdr *) (skb->data + hdrlen); + mshdr->flags = MESH_FLAGS_AE_A5_A6; + memcpy(mshdr->eaddr1, hdr->addr3, ETH_ALEN); + memcpy(mshdr->eaddr2, hdr->addr4, ETH_ALEN); + } + + /* update next hop */ + hdr = (struct ieee80211_hdr *) skb->data; + rcu_read_lock(); + next_hop = rcu_dereference(gate_mpath->next_hop)->sta.addr; + memcpy(hdr->addr1, next_hop, ETH_ALEN); + rcu_read_unlock(); + memcpy(hdr->addr2, gate_mpath->sdata->vif.addr, ETH_ALEN); + memcpy(hdr->addr3, dst_addr, ETH_ALEN); +} /** - * mesh_path_lookup - look up a path in the mesh path table - * @dst: hardware address (ETH_ALEN length) of destination - * @sdata: local subif * - * Returns: pointer to the mesh path structure, or NULL if not found + * mesh_path_move_to_queue - Move or copy frames from one mpath queue to another * - * Locking: must be called within a read rcu section. + * This function is used to transfer or copy frames from an unresolved mpath to + * a gate mpath. The function also adds the Address Extension field and + * updates the next hop. + * + * If a frame already has an Address Extension field, only the next hop and + * destination addresses are updated. + * + * The gate mpath must be an active mpath with a valid mpath->next_hop. + * + * @mpath: An active mpath the frames will be sent to (i.e. the gate) + * @from_mpath: The failed mpath + * @copy: When true, copy all the frames to the new mpath queue. When false, + * move them. */ -struct mesh_path *mesh_path_lookup(u8 *dst, struct ieee80211_sub_if_data *sdata) +static void mesh_path_move_to_queue(struct mesh_path *gate_mpath, + struct mesh_path *from_mpath, + bool copy) { - struct mesh_path *mpath; - struct hlist_node *n; - struct hlist_head *bucket; - struct mesh_table *tbl; - struct mpath_node *node; + struct sk_buff *skb, *cp_skb = NULL; + struct sk_buff_head gateq, failq; + unsigned long flags; + int num_skbs; - tbl = rcu_dereference(mesh_paths); + BUG_ON(gate_mpath == from_mpath); + BUG_ON(!gate_mpath->next_hop); - bucket = &tbl->hash_buckets[mesh_table_hash(dst, sdata, tbl)]; - hlist_for_each_entry_rcu(node, n, bucket, list) { - mpath = node->mpath; - if (mpath->sdata == sdata && - memcmp(dst, mpath->dst, ETH_ALEN) == 0) { - if (MPATH_EXPIRED(mpath)) { - spin_lock_bh(&mpath->state_lock); - if (MPATH_EXPIRED(mpath)) - mpath->flags &= ~MESH_PATH_ACTIVE; - spin_unlock_bh(&mpath->state_lock); - } - return mpath; + __skb_queue_head_init(&gateq); + __skb_queue_head_init(&failq); + + spin_lock_irqsave(&from_mpath->frame_queue.lock, flags); + skb_queue_splice_init(&from_mpath->frame_queue, &failq); + spin_unlock_irqrestore(&from_mpath->frame_queue.lock, flags); + + num_skbs = skb_queue_len(&failq); + + while (num_skbs--) { + skb = __skb_dequeue(&failq); + if (copy) { + cp_skb = skb_copy(skb, GFP_ATOMIC); + if (cp_skb) + __skb_queue_tail(&failq, cp_skb); } + + prepare_for_gate(skb, gate_mpath->dst, gate_mpath); + __skb_queue_tail(&gateq, skb); } - return NULL; + + spin_lock_irqsave(&gate_mpath->frame_queue.lock, flags); + skb_queue_splice(&gateq, &gate_mpath->frame_queue); + mpath_dbg("Mpath queue for gate %pM has %d frames\n", + gate_mpath->dst, + skb_queue_len(&gate_mpath->frame_queue)); + spin_unlock_irqrestore(&gate_mpath->frame_queue.lock, flags); + + if (!copy) + return; + + spin_lock_irqsave(&from_mpath->frame_queue.lock, flags); + skb_queue_splice(&failq, &from_mpath->frame_queue); + spin_unlock_irqrestore(&from_mpath->frame_queue.lock, flags); } -struct mesh_path *mpp_path_lookup(u8 *dst, struct ieee80211_sub_if_data *sdata) + +static struct mesh_path *path_lookup(struct mesh_table *tbl, u8 *dst, + struct ieee80211_sub_if_data *sdata) { struct mesh_path *mpath; struct hlist_node *n; struct hlist_head *bucket; - struct mesh_table *tbl; struct mpath_node *node; - tbl = rcu_dereference(mpp_paths); - bucket = &tbl->hash_buckets[mesh_table_hash(dst, sdata, tbl)]; hlist_for_each_entry_rcu(node, n, bucket, list) { mpath = node->mpath; if (mpath->sdata == sdata && - memcmp(dst, mpath->dst, ETH_ALEN) == 0) { + memcmp(dst, mpath->dst, ETH_ALEN) == 0) { if (MPATH_EXPIRED(mpath)) { spin_lock_bh(&mpath->state_lock); - if (MPATH_EXPIRED(mpath)) - mpath->flags &= ~MESH_PATH_ACTIVE; + mpath->flags &= ~MESH_PATH_ACTIVE; spin_unlock_bh(&mpath->state_lock); } return mpath; @@ -269,6 +360,25 @@ struct mesh_path *mpp_path_lookup(u8 *dst, struct ieee80211_sub_if_data *sdata) return NULL; } +/** + * mesh_path_lookup - look up a path in the mesh path table + * @dst: hardware address (ETH_ALEN length) of destination + * @sdata: local subif + * + * Returns: pointer to the mesh path structure, or NULL if not found + * + * Locking: must be called within a read rcu section. + */ +struct mesh_path *mesh_path_lookup(u8 *dst, struct ieee80211_sub_if_data *sdata) +{ + return path_lookup(rcu_dereference(mesh_paths), dst, sdata); +} + +struct mesh_path *mpp_path_lookup(u8 *dst, struct ieee80211_sub_if_data *sdata) +{ + return path_lookup(rcu_dereference(mpp_paths), dst, sdata); +} + /** * mesh_path_lookup_by_idx - look up a path in the mesh path table by its index @@ -293,8 +403,7 @@ struct mesh_path *mesh_path_lookup_by_idx(int idx, struct ieee80211_sub_if_data if (j++ == idx) { if (MPATH_EXPIRED(node->mpath)) { spin_lock_bh(&node->mpath->state_lock); - if (MPATH_EXPIRED(node->mpath)) - node->mpath->flags &= ~MESH_PATH_ACTIVE; + node->mpath->flags &= ~MESH_PATH_ACTIVE; spin_unlock_bh(&node->mpath->state_lock); } return node->mpath; @@ -304,6 +413,94 @@ struct mesh_path *mesh_path_lookup_by_idx(int idx, struct ieee80211_sub_if_data return NULL; } +static void mesh_gate_node_reclaim(struct rcu_head *rp) +{ + struct mpath_node *node = container_of(rp, struct mpath_node, rcu); + kfree(node); +} + +/** + * mesh_path_add_gate - add the given mpath to a mesh gate to our path table + * @mpath: gate path to add to table + */ +int mesh_path_add_gate(struct mesh_path *mpath) +{ + struct mesh_table *tbl; + struct mpath_node *gate, *new_gate; + struct hlist_node *n; + int err; + + rcu_read_lock(); + tbl = rcu_dereference(mesh_paths); + + hlist_for_each_entry_rcu(gate, n, tbl->known_gates, list) + if (gate->mpath == mpath) { + err = -EEXIST; + goto err_rcu; + } + + new_gate = kzalloc(sizeof(struct mpath_node), GFP_ATOMIC); + if (!new_gate) { + err = -ENOMEM; + goto err_rcu; + } + + mpath->is_gate = true; + mpath->sdata->u.mesh.num_gates++; + new_gate->mpath = mpath; + spin_lock_bh(&tbl->gates_lock); + hlist_add_head_rcu(&new_gate->list, tbl->known_gates); + spin_unlock_bh(&tbl->gates_lock); + rcu_read_unlock(); + mpath_dbg("Mesh path (%s): Recorded new gate: %pM. %d known gates\n", + mpath->sdata->name, mpath->dst, + mpath->sdata->u.mesh.num_gates); + return 0; +err_rcu: + rcu_read_unlock(); + return err; +} + +/** + * mesh_gate_del - remove a mesh gate from the list of known gates + * @tbl: table which holds our list of known gates + * @mpath: gate mpath + * + * Returns: 0 on success + * + * Locking: must be called inside rcu_read_lock() section + */ +static int mesh_gate_del(struct mesh_table *tbl, struct mesh_path *mpath) +{ + struct mpath_node *gate; + struct hlist_node *p, *q; + + hlist_for_each_entry_safe(gate, p, q, tbl->known_gates, list) + if (gate->mpath == mpath) { + spin_lock_bh(&tbl->gates_lock); + hlist_del_rcu(&gate->list); + call_rcu(&gate->rcu, mesh_gate_node_reclaim); + spin_unlock_bh(&tbl->gates_lock); + mpath->sdata->u.mesh.num_gates--; + mpath->is_gate = false; + mpath_dbg("Mesh path (%s): Deleted gate: %pM. " + "%d known gates\n", mpath->sdata->name, + mpath->dst, mpath->sdata->u.mesh.num_gates); + break; + } + + return 0; +} + +/** + * mesh_gate_num - number of gates known to this interface + * @sdata: subif data + */ +int mesh_gate_num(struct ieee80211_sub_if_data *sdata) +{ + return sdata->u.mesh.num_gates; +} + /** * mesh_path_add - allocate and add a new path to the mesh path table * @addr: destination address of the path (ETH_ALEN length) @@ -481,6 +678,7 @@ int mpp_path_add(u8 *dst, u8 *mpp, struct ieee80211_sub_if_data *sdata) new_mpath->flags = 0; skb_queue_head_init(&new_mpath->frame_queue); new_node->mpath = new_mpath; + init_timer(&new_mpath->timer); new_mpath->exp_time = jiffies; spin_lock_init(&new_mpath->state_lock); @@ -539,28 +737,53 @@ void mesh_plink_broken(struct sta_info *sta) struct hlist_node *p; struct ieee80211_sub_if_data *sdata = sta->sdata; int i; + __le16 reason = cpu_to_le16(WLAN_REASON_MESH_PATH_DEST_UNREACHABLE); rcu_read_lock(); tbl = rcu_dereference(mesh_paths); for_each_mesh_entry(tbl, p, node, i) { mpath = node->mpath; - spin_lock_bh(&mpath->state_lock); if (rcu_dereference(mpath->next_hop) == sta && mpath->flags & MESH_PATH_ACTIVE && !(mpath->flags & MESH_PATH_FIXED)) { + spin_lock_bh(&mpath->state_lock); mpath->flags &= ~MESH_PATH_ACTIVE; ++mpath->sn; spin_unlock_bh(&mpath->state_lock); mesh_path_error_tx(sdata->u.mesh.mshcfg.element_ttl, mpath->dst, cpu_to_le32(mpath->sn), - cpu_to_le16(PERR_RCODE_DEST_UNREACH), - bcast, sdata); - } else - spin_unlock_bh(&mpath->state_lock); + reason, bcast, sdata); + } } rcu_read_unlock(); } +static void mesh_path_node_reclaim(struct rcu_head *rp) +{ + struct mpath_node *node = container_of(rp, struct mpath_node, rcu); + struct ieee80211_sub_if_data *sdata = node->mpath->sdata; + + del_timer_sync(&node->mpath->timer); + atomic_dec(&sdata->u.mesh.mpaths); + kfree(node->mpath); + kfree(node); +} + +/* needs to be called with the corresponding hashwlock taken */ +static void __mesh_path_del(struct mesh_table *tbl, struct mpath_node *node) +{ + struct mesh_path *mpath; + mpath = node->mpath; + spin_lock(&mpath->state_lock); + mpath->flags |= MESH_PATH_RESOLVING; + if (mpath->is_gate) + mesh_gate_del(tbl, mpath); + hlist_del_rcu(&node->list); + call_rcu(&node->rcu, mesh_path_node_reclaim); + spin_unlock(&mpath->state_lock); + atomic_dec(&tbl->entries); +} + /** * mesh_path_flush_by_nexthop - Deletes mesh paths if their next hop matches * @@ -581,42 +804,59 @@ void mesh_path_flush_by_nexthop(struct sta_info *sta) int i; rcu_read_lock(); - tbl = rcu_dereference(mesh_paths); + read_lock_bh(&pathtbl_resize_lock); + tbl = resize_dereference_mesh_paths(); for_each_mesh_entry(tbl, p, node, i) { mpath = node->mpath; - if (rcu_dereference(mpath->next_hop) == sta) - mesh_path_del(mpath->dst, mpath->sdata); + if (rcu_dereference(mpath->next_hop) == sta) { + spin_lock_bh(&tbl->hashwlock[i]); + __mesh_path_del(tbl, node); + spin_unlock_bh(&tbl->hashwlock[i]); + } } + read_unlock_bh(&pathtbl_resize_lock); rcu_read_unlock(); } -void mesh_path_flush(struct ieee80211_sub_if_data *sdata) +static void table_flush_by_iface(struct mesh_table *tbl, + struct ieee80211_sub_if_data *sdata) { - struct mesh_table *tbl; struct mesh_path *mpath; struct mpath_node *node; struct hlist_node *p; int i; - rcu_read_lock(); - tbl = rcu_dereference(mesh_paths); + WARN_ON(!rcu_read_lock_held()); for_each_mesh_entry(tbl, p, node, i) { mpath = node->mpath; - if (mpath->sdata == sdata) - mesh_path_del(mpath->dst, mpath->sdata); + if (mpath->sdata != sdata) + continue; + spin_lock_bh(&tbl->hashwlock[i]); + __mesh_path_del(tbl, node); + spin_unlock_bh(&tbl->hashwlock[i]); } - rcu_read_unlock(); } -static void mesh_path_node_reclaim(struct rcu_head *rp) +/** + * mesh_path_flush_by_iface - Deletes all mesh paths associated with a given iface + * + * This function deletes both mesh paths as well as mesh portal paths. + * + * @sdata - interface data to match + * + */ +void mesh_path_flush_by_iface(struct ieee80211_sub_if_data *sdata) { - struct mpath_node *node = container_of(rp, struct mpath_node, rcu); - struct ieee80211_sub_if_data *sdata = node->mpath->sdata; + struct mesh_table *tbl; - del_timer_sync(&node->mpath->timer); - atomic_dec(&sdata->u.mesh.mpaths); - kfree(node->mpath); - kfree(node); + rcu_read_lock(); + read_lock_bh(&pathtbl_resize_lock); + tbl = resize_dereference_mesh_paths(); + table_flush_by_iface(tbl, sdata); + tbl = resize_dereference_mpp_paths(); + table_flush_by_iface(tbl, sdata); + read_unlock_bh(&pathtbl_resize_lock); + rcu_read_unlock(); } /** @@ -647,12 +887,7 @@ int mesh_path_del(u8 *addr, struct ieee80211_sub_if_data *sdata) mpath = node->mpath; if (mpath->sdata == sdata && memcmp(addr, mpath->dst, ETH_ALEN) == 0) { - spin_lock(&mpath->state_lock); - mpath->flags |= MESH_PATH_RESOLVING; - hlist_del_rcu(&node->list); - call_rcu(&node->rcu, mesh_path_node_reclaim); - atomic_dec(&tbl->entries); - spin_unlock(&mpath->state_lock); + __mesh_path_del(tbl, node); goto enddel; } } @@ -681,38 +916,68 @@ void mesh_path_tx_pending(struct mesh_path *mpath) } /** + * mesh_path_send_to_gates - sends pending frames to all known mesh gates + * + * @mpath: mesh path whose queue will be emptied + * + * If there is only one gate, the frames are transferred from the failed mpath + * queue to that gate's queue. If there are more than one gates, the frames + * are copied from each gate to the next. After frames are copied, the + * mpath queues are emptied onto the transmission queue. + */ +int mesh_path_send_to_gates(struct mesh_path *mpath) +{ + struct ieee80211_sub_if_data *sdata = mpath->sdata; + struct hlist_node *n; + struct mesh_table *tbl; + struct mesh_path *from_mpath = mpath; + struct mpath_node *gate = NULL; + bool copy = false; + struct hlist_head *known_gates; + + rcu_read_lock(); + tbl = rcu_dereference(mesh_paths); + known_gates = tbl->known_gates; + rcu_read_unlock(); + + if (!known_gates) + return -EHOSTUNREACH; + + hlist_for_each_entry_rcu(gate, n, known_gates, list) { + if (gate->mpath->sdata != sdata) + continue; + + if (gate->mpath->flags & MESH_PATH_ACTIVE) { + mpath_dbg("Forwarding to %pM\n", gate->mpath->dst); + mesh_path_move_to_queue(gate->mpath, from_mpath, copy); + from_mpath = gate->mpath; + copy = true; + } else { + mpath_dbg("Not forwarding %p\n", gate->mpath); + mpath_dbg("flags %x\n", gate->mpath->flags); + } + } + + hlist_for_each_entry_rcu(gate, n, known_gates, list) + if (gate->mpath->sdata == sdata) { + mpath_dbg("Sending to %pM\n", gate->mpath->dst); + mesh_path_tx_pending(gate->mpath); + } + + return (from_mpath == mpath) ? -EHOSTUNREACH : 0; +} + +/** * mesh_path_discard_frame - discard a frame whose path could not be resolved * * @skb: frame to discard * @sdata: network subif the frame was to be sent through * - * If the frame was being forwarded from another MP, a PERR frame will be sent - * to the precursor. The precursor's address (i.e. the previous hop) was saved - * in addr1 of the frame-to-be-forwarded, and would only be overwritten once - * the destination is successfully resolved. - * * Locking: the function must me called within a rcu_read_lock region */ void mesh_path_discard_frame(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata) { - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; - struct mesh_path *mpath; - u32 sn = 0; - - if (memcmp(hdr->addr4, sdata->vif.addr, ETH_ALEN) != 0) { - u8 *ra, *da; - - da = hdr->addr3; - ra = hdr->addr1; - mpath = mesh_path_lookup(da, sdata); - if (mpath) - sn = ++mpath->sn; - mesh_path_error_tx(sdata->u.mesh.mshcfg.element_ttl, skb->data, - cpu_to_le32(sn), - cpu_to_le16(PERR_RCODE_NO_ROUTE), ra, sdata); - } - kfree_skb(skb); sdata->u.mesh.mshstats.dropped_frames_no_route++; } @@ -728,8 +993,7 @@ void mesh_path_flush_pending(struct mesh_path *mpath) { struct sk_buff *skb; - while ((skb = skb_dequeue(&mpath->frame_queue)) && - (mpath->flags & MESH_PATH_ACTIVE)) + while ((skb = skb_dequeue(&mpath->frame_queue)) != NULL) mesh_path_discard_frame(skb, mpath->sdata); } @@ -790,6 +1054,7 @@ static int mesh_path_node_copy(struct hlist_node *p, struct mesh_table *newtbl) int mesh_pathtbl_init(void) { struct mesh_table *tbl_path, *tbl_mpp; + int ret; tbl_path = mesh_table_alloc(INIT_PATHS_SIZE_ORDER); if (!tbl_path) @@ -797,21 +1062,40 @@ int mesh_pathtbl_init(void) tbl_path->free_node = &mesh_path_node_free; tbl_path->copy_node = &mesh_path_node_copy; tbl_path->mean_chain_len = MEAN_CHAIN_LEN; + tbl_path->known_gates = kzalloc(sizeof(struct hlist_head), GFP_ATOMIC); + if (!tbl_path->known_gates) { + ret = -ENOMEM; + goto free_path; + } + INIT_HLIST_HEAD(tbl_path->known_gates); + tbl_mpp = mesh_table_alloc(INIT_PATHS_SIZE_ORDER); if (!tbl_mpp) { - mesh_table_free(tbl_path, true); - return -ENOMEM; + ret = -ENOMEM; + goto free_path; } tbl_mpp->free_node = &mesh_path_node_free; tbl_mpp->copy_node = &mesh_path_node_copy; tbl_mpp->mean_chain_len = MEAN_CHAIN_LEN; + tbl_mpp->known_gates = kzalloc(sizeof(struct hlist_head), GFP_ATOMIC); + if (!tbl_mpp->known_gates) { + ret = -ENOMEM; + goto free_mpp; + } + INIT_HLIST_HEAD(tbl_mpp->known_gates); /* Need no locking since this is during init */ RCU_INIT_POINTER(mesh_paths, tbl_path); RCU_INIT_POINTER(mpp_paths, tbl_mpp); return 0; + +free_mpp: + mesh_table_free(tbl_mpp, true); +free_path: + mesh_table_free(tbl_path, true); + return ret; } void mesh_path_expire(struct ieee80211_sub_if_data *sdata) @@ -828,14 +1112,10 @@ void mesh_path_expire(struct ieee80211_sub_if_data *sdata) if (node->mpath->sdata != sdata) continue; mpath = node->mpath; - spin_lock_bh(&mpath->state_lock); if ((!(mpath->flags & MESH_PATH_RESOLVING)) && (!(mpath->flags & MESH_PATH_FIXED)) && - time_after(jiffies, mpath->exp_time + MESH_PATH_EXPIRE)) { - spin_unlock_bh(&mpath->state_lock); + time_after(jiffies, mpath->exp_time + MESH_PATH_EXPIRE)) mesh_path_del(mpath->dst, mpath->sdata); - } else - spin_unlock_bh(&mpath->state_lock); } rcu_read_unlock(); } @@ -843,6 +1123,6 @@ void mesh_path_expire(struct ieee80211_sub_if_data *sdata) void mesh_pathtbl_unregister(void) { /* no need for locking during exit path */ - mesh_table_free(rcu_dereference_raw(mesh_paths), true); - mesh_table_free(rcu_dereference_raw(mpp_paths), true); + mesh_table_free(rcu_dereference_protected(mesh_paths, 1), true); + mesh_table_free(rcu_dereference_protected(mpp_paths, 1), true); } diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index f4adc0917888..41ef1b476442 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -19,35 +19,18 @@ #define mpl_dbg(fmt, args...) do { (void)(0); } while (0) #endif -#define PLINK_GET_LLID(p) (p + 4) -#define PLINK_GET_PLID(p) (p + 6) +#define PLINK_GET_LLID(p) (p + 2) +#define PLINK_GET_PLID(p) (p + 4) #define mod_plink_timer(s, t) (mod_timer(&s->plink_timer, \ jiffies + HZ * t / 1000)) -/* Peer link cancel reasons, all subject to ANA approval */ -#define MESH_LINK_CANCELLED 2 -#define MESH_MAX_NEIGHBORS 3 -#define MESH_CAPABILITY_POLICY_VIOLATION 4 -#define MESH_CLOSE_RCVD 5 -#define MESH_MAX_RETRIES 6 -#define MESH_CONFIRM_TIMEOUT 7 -#define MESH_SECURITY_ROLE_NEGOTIATION_DIFFERS 8 -#define MESH_SECURITY_AUTHENTICATION_IMPOSSIBLE 9 -#define MESH_SECURITY_FAILED_VERIFICATION 10 - #define dot11MeshMaxRetries(s) (s->u.mesh.mshcfg.dot11MeshMaxRetries) #define dot11MeshRetryTimeout(s) (s->u.mesh.mshcfg.dot11MeshRetryTimeout) #define dot11MeshConfirmTimeout(s) (s->u.mesh.mshcfg.dot11MeshConfirmTimeout) #define dot11MeshHoldingTimeout(s) (s->u.mesh.mshcfg.dot11MeshHoldingTimeout) #define dot11MeshMaxPeerLinks(s) (s->u.mesh.mshcfg.dot11MeshMaxPeerLinks) -enum plink_frame_type { - PLINK_OPEN = 1, - PLINK_CONFIRM, - PLINK_CLOSE -}; - enum plink_event { PLINK_UNDEFINED, OPN_ACPT, @@ -60,6 +43,10 @@ enum plink_event { CLS_IGNR }; +static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata, + enum ieee80211_self_protected_actioncode action, + u8 *da, __le16 llid, __le16 plid, __le16 reason); + static inline void mesh_plink_inc_estab_count(struct ieee80211_sub_if_data *sdata) { @@ -93,11 +80,15 @@ static inline void mesh_plink_fsm_restart(struct sta_info *sta) * on it in the lifecycle management section! */ static struct sta_info *mesh_plink_alloc(struct ieee80211_sub_if_data *sdata, - u8 *hw_addr, u32 rates) + u8 *hw_addr, u32 rates, + struct ieee802_11_elems *elems) { struct ieee80211_local *local = sdata->local; + struct ieee80211_supported_band *sband; struct sta_info *sta; + sband = local->hw.wiphy->bands[local->oper_channel->band]; + if (local->num_sta >= MESH_MAX_PLINKS) return NULL; @@ -105,8 +96,17 @@ static struct sta_info *mesh_plink_alloc(struct ieee80211_sub_if_data *sdata, if (!sta) return NULL; - sta->flags = WLAN_STA_AUTHORIZED | WLAN_STA_AUTH; + sta_info_move_state(sta, IEEE80211_STA_AUTH); + sta_info_move_state(sta, IEEE80211_STA_ASSOC); + sta_info_move_state(sta, IEEE80211_STA_AUTHORIZED); + + set_sta_flag(sta, WLAN_STA_WME); + sta->sta.supp_rates[local->hw.conf.channel->band] = rates; + if (elems->ht_cap_elem) + ieee80211_ht_cap_ie_to_sta_ht_cap(sdata, sband, + elems->ht_cap_elem, + &sta->sta.ht_cap); rate_control_rate_init(sta); return sta; @@ -150,6 +150,10 @@ void mesh_plink_deactivate(struct sta_info *sta) spin_lock_bh(&sta->lock); deactivated = __mesh_plink_deactivate(sta); + sta->reason = cpu_to_le16(WLAN_REASON_MESH_PEER_CANCELED); + mesh_plink_frame_tx(sdata, WLAN_SP_MESH_PEERING_CLOSE, + sta->sta.addr, sta->llid, sta->plid, + sta->reason); spin_unlock_bh(&sta->lock); if (deactivated) @@ -157,82 +161,109 @@ void mesh_plink_deactivate(struct sta_info *sta) } static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata, - enum plink_frame_type action, u8 *da, __le16 llid, __le16 plid, - __le16 reason) { + enum ieee80211_self_protected_actioncode action, + u8 *da, __le16 llid, __le16 plid, __le16 reason) { struct ieee80211_local *local = sdata->local; - struct sk_buff *skb = dev_alloc_skb(local->hw.extra_tx_headroom + 400 + - sdata->u.mesh.ie_len); + struct sk_buff *skb; struct ieee80211_mgmt *mgmt; bool include_plid = false; - static const u8 meshpeeringproto[] = { 0x00, 0x0F, 0xAC, 0x2A }; - u8 *pos; - int ie_len; - + u16 peering_proto = 0; + u8 *pos, ie_len = 4; + int hdr_len = offsetof(struct ieee80211_mgmt, u.action.u.self_prot) + + sizeof(mgmt->u.action.u.self_prot); + + skb = dev_alloc_skb(local->hw.extra_tx_headroom + + hdr_len + + 2 + /* capability info */ + 2 + /* AID */ + 2 + 8 + /* supported rates */ + 2 + (IEEE80211_MAX_SUPP_RATES - 8) + + 2 + sdata->u.mesh.mesh_id_len + + 2 + sizeof(struct ieee80211_meshconf_ie) + + 2 + sizeof(struct ieee80211_ht_cap) + + 2 + sizeof(struct ieee80211_ht_info) + + 2 + 8 + /* peering IE */ + sdata->u.mesh.ie_len); if (!skb) return -1; skb_reserve(skb, local->hw.extra_tx_headroom); - /* 25 is the size of the common mgmt part (24) plus the size of the - * common action part (1) - */ - mgmt = (struct ieee80211_mgmt *) - skb_put(skb, 25 + sizeof(mgmt->u.action.u.plink_action)); - memset(mgmt, 0, 25 + sizeof(mgmt->u.action.u.plink_action)); + mgmt = (struct ieee80211_mgmt *) skb_put(skb, hdr_len); + memset(mgmt, 0, hdr_len); mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_ACTION); memcpy(mgmt->da, da, ETH_ALEN); memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN); memcpy(mgmt->bssid, sdata->vif.addr, ETH_ALEN); - mgmt->u.action.category = WLAN_CATEGORY_MESH_ACTION; - mgmt->u.action.u.plink_action.action_code = action; - - if (action == PLINK_CLOSE) - mgmt->u.action.u.plink_action.aux = reason; - else { - mgmt->u.action.u.plink_action.aux = cpu_to_le16(0x0); - if (action == PLINK_CONFIRM) { - pos = skb_put(skb, 4); - /* two-byte status code followed by two-byte AID */ - memset(pos, 0, 2); + mgmt->u.action.category = WLAN_CATEGORY_SELF_PROTECTED; + mgmt->u.action.u.self_prot.action_code = action; + + if (action != WLAN_SP_MESH_PEERING_CLOSE) { + /* capability info */ + pos = skb_put(skb, 2); + memset(pos, 0, 2); + if (action == WLAN_SP_MESH_PEERING_CONFIRM) { + /* AID */ + pos = skb_put(skb, 2); memcpy(pos + 2, &plid, 2); } - mesh_mgmt_ies_add(skb, sdata); + if (ieee80211_add_srates_ie(&sdata->vif, skb) || + ieee80211_add_ext_srates_ie(&sdata->vif, skb) || + mesh_add_rsn_ie(skb, sdata) || + mesh_add_meshid_ie(skb, sdata) || + mesh_add_meshconf_ie(skb, sdata)) + return -1; + } else { /* WLAN_SP_MESH_PEERING_CLOSE */ + if (mesh_add_meshid_ie(skb, sdata)) + return -1; } - /* Add Peer Link Management element */ + /* Add Mesh Peering Management element */ switch (action) { - case PLINK_OPEN: - ie_len = 6; + case WLAN_SP_MESH_PEERING_OPEN: break; - case PLINK_CONFIRM: - ie_len = 8; + case WLAN_SP_MESH_PEERING_CONFIRM: + ie_len += 2; include_plid = true; break; - case PLINK_CLOSE: - default: - if (!plid) - ie_len = 8; - else { - ie_len = 10; + case WLAN_SP_MESH_PEERING_CLOSE: + if (plid) { + ie_len += 2; include_plid = true; } + ie_len += 2; /* reason code */ break; + default: + return -EINVAL; } + if (WARN_ON(skb_tailroom(skb) < 2 + ie_len)) + return -ENOMEM; + pos = skb_put(skb, 2 + ie_len); - *pos++ = WLAN_EID_PEER_LINK; + *pos++ = WLAN_EID_PEER_MGMT; *pos++ = ie_len; - memcpy(pos, meshpeeringproto, sizeof(meshpeeringproto)); - pos += 4; + memcpy(pos, &peering_proto, 2); + pos += 2; memcpy(pos, &llid, 2); + pos += 2; if (include_plid) { - pos += 2; memcpy(pos, &plid, 2); - } - if (action == PLINK_CLOSE) { pos += 2; + } + if (action == WLAN_SP_MESH_PEERING_CLOSE) { memcpy(pos, &reason, 2); + pos += 2; } + if (action != WLAN_SP_MESH_PEERING_CLOSE) { + if (mesh_add_ht_cap_ie(skb, sdata) || + mesh_add_ht_info_ie(skb, sdata)) + return -1; + } + + if (mesh_add_vendor_ies(skb, sdata)) + return -1; + ieee80211_tx_skb(sdata, skb); return 0; } @@ -256,7 +287,7 @@ void mesh_neighbour_update(u8 *hw_addr, u32 rates, elems->ie_start, elems->total_len, GFP_KERNEL); else - sta = mesh_plink_alloc(sdata, hw_addr, rates); + sta = mesh_plink_alloc(sdata, hw_addr, rates, elems); if (!sta) return; if (sta_info_insert_rcu(sta)) { @@ -322,21 +353,21 @@ static void mesh_plink_timer(unsigned long data) ++sta->plink_retries; mod_plink_timer(sta, sta->plink_timeout); spin_unlock_bh(&sta->lock); - mesh_plink_frame_tx(sdata, PLINK_OPEN, sta->sta.addr, llid, - 0, 0); + mesh_plink_frame_tx(sdata, WLAN_SP_MESH_PEERING_OPEN, + sta->sta.addr, llid, 0, 0); break; } - reason = cpu_to_le16(MESH_MAX_RETRIES); + reason = cpu_to_le16(WLAN_REASON_MESH_MAX_RETRIES); /* fall through on else */ case NL80211_PLINK_CNF_RCVD: /* confirm timer */ if (!reason) - reason = cpu_to_le16(MESH_CONFIRM_TIMEOUT); + reason = cpu_to_le16(WLAN_REASON_MESH_CONFIRM_TIMEOUT); sta->plink_state = NL80211_PLINK_HOLDING; mod_plink_timer(sta, dot11MeshHoldingTimeout(sdata)); spin_unlock_bh(&sta->lock); - mesh_plink_frame_tx(sdata, PLINK_CLOSE, sta->sta.addr, llid, plid, - reason); + mesh_plink_frame_tx(sdata, WLAN_SP_MESH_PEERING_CLOSE, + sta->sta.addr, llid, plid, reason); break; case NL80211_PLINK_HOLDING: /* holding timer */ @@ -380,7 +411,7 @@ int mesh_plink_open(struct sta_info *sta) __le16 llid; struct ieee80211_sub_if_data *sdata = sta->sdata; - if (!test_sta_flags(sta, WLAN_STA_AUTH)) + if (!test_sta_flag(sta, WLAN_STA_AUTH)) return -EPERM; spin_lock_bh(&sta->lock); @@ -396,7 +427,7 @@ int mesh_plink_open(struct sta_info *sta) mpl_dbg("Mesh plink: starting establishment with %pM\n", sta->sta.addr); - return mesh_plink_frame_tx(sdata, PLINK_OPEN, + return mesh_plink_frame_tx(sdata, WLAN_SP_MESH_PEERING_OPEN, sta->sta.addr, llid, 0, 0); } @@ -422,7 +453,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m struct ieee802_11_elems elems; struct sta_info *sta; enum plink_event event; - enum plink_frame_type ftype; + enum ieee80211_self_protected_actioncode ftype; size_t baselen; bool deactivated, matches_local = true; u8 ie_len; @@ -449,14 +480,15 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m return; } - baseaddr = mgmt->u.action.u.plink_action.variable; - baselen = (u8 *) mgmt->u.action.u.plink_action.variable - (u8 *) mgmt; - if (mgmt->u.action.u.plink_action.action_code == PLINK_CONFIRM) { + baseaddr = mgmt->u.action.u.self_prot.variable; + baselen = (u8 *) mgmt->u.action.u.self_prot.variable - (u8 *) mgmt; + if (mgmt->u.action.u.self_prot.action_code == + WLAN_SP_MESH_PEERING_CONFIRM) { baseaddr += 4; baselen += 4; } ieee802_11_parse_elems(baseaddr, len - baselen, &elems); - if (!elems.peer_link) { + if (!elems.peering) { mpl_dbg("Mesh plink: missing necessary peer link ie\n"); return; } @@ -466,37 +498,40 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m return; } - ftype = mgmt->u.action.u.plink_action.action_code; - ie_len = elems.peer_link_len; - if ((ftype == PLINK_OPEN && ie_len != 6) || - (ftype == PLINK_CONFIRM && ie_len != 8) || - (ftype == PLINK_CLOSE && ie_len != 8 && ie_len != 10)) { + ftype = mgmt->u.action.u.self_prot.action_code; + ie_len = elems.peering_len; + if ((ftype == WLAN_SP_MESH_PEERING_OPEN && ie_len != 4) || + (ftype == WLAN_SP_MESH_PEERING_CONFIRM && ie_len != 6) || + (ftype == WLAN_SP_MESH_PEERING_CLOSE && ie_len != 6 + && ie_len != 8)) { mpl_dbg("Mesh plink: incorrect plink ie length %d %d\n", ftype, ie_len); return; } - if (ftype != PLINK_CLOSE && (!elems.mesh_id || !elems.mesh_config)) { + if (ftype != WLAN_SP_MESH_PEERING_CLOSE && + (!elems.mesh_id || !elems.mesh_config)) { mpl_dbg("Mesh plink: missing necessary ie\n"); return; } /* Note the lines below are correct, the llid in the frame is the plid * from the point of view of this host. */ - memcpy(&plid, PLINK_GET_LLID(elems.peer_link), 2); - if (ftype == PLINK_CONFIRM || (ftype == PLINK_CLOSE && ie_len == 10)) - memcpy(&llid, PLINK_GET_PLID(elems.peer_link), 2); + memcpy(&plid, PLINK_GET_LLID(elems.peering), 2); + if (ftype == WLAN_SP_MESH_PEERING_CONFIRM || + (ftype == WLAN_SP_MESH_PEERING_CLOSE && ie_len == 8)) + memcpy(&llid, PLINK_GET_PLID(elems.peering), 2); rcu_read_lock(); sta = sta_info_get(sdata, mgmt->sa); - if (!sta && ftype != PLINK_OPEN) { + if (!sta && ftype != WLAN_SP_MESH_PEERING_OPEN) { mpl_dbg("Mesh plink: cls or cnf from unknown peer\n"); rcu_read_unlock(); return; } - if (sta && !test_sta_flags(sta, WLAN_STA_AUTH)) { + if (sta && !test_sta_flag(sta, WLAN_STA_AUTH)) { mpl_dbg("Mesh plink: Action frame from non-authed peer\n"); rcu_read_unlock(); return; @@ -509,30 +544,30 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m /* Now we will figure out the appropriate event... */ event = PLINK_UNDEFINED; - if (ftype != PLINK_CLOSE && (!mesh_matches_local(&elems, sdata))) { + if (ftype != WLAN_SP_MESH_PEERING_CLOSE && + (!mesh_matches_local(&elems, sdata))) { matches_local = false; switch (ftype) { - case PLINK_OPEN: + case WLAN_SP_MESH_PEERING_OPEN: event = OPN_RJCT; break; - case PLINK_CONFIRM: + case WLAN_SP_MESH_PEERING_CONFIRM: event = CNF_RJCT; break; - case PLINK_CLOSE: - /* avoid warning */ + default: break; } } if (!sta && !matches_local) { rcu_read_unlock(); - reason = cpu_to_le16(MESH_CAPABILITY_POLICY_VIOLATION); + reason = cpu_to_le16(WLAN_REASON_MESH_CONFIG); llid = 0; - mesh_plink_frame_tx(sdata, PLINK_CLOSE, mgmt->sa, llid, - plid, reason); + mesh_plink_frame_tx(sdata, WLAN_SP_MESH_PEERING_CLOSE, + mgmt->sa, llid, plid, reason); return; } else if (!sta) { - /* ftype == PLINK_OPEN */ + /* ftype == WLAN_SP_MESH_PEERING_OPEN */ u32 rates; rcu_read_unlock(); @@ -543,7 +578,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m } rates = ieee80211_sta_get_rates(local, &elems, rx_status->band); - sta = mesh_plink_alloc(sdata, mgmt->sa, rates); + sta = mesh_plink_alloc(sdata, mgmt->sa, rates, &elems); if (!sta) { mpl_dbg("Mesh plink error: plink table full\n"); return; @@ -557,21 +592,21 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m } else if (matches_local) { spin_lock_bh(&sta->lock); switch (ftype) { - case PLINK_OPEN: + case WLAN_SP_MESH_PEERING_OPEN: if (!mesh_plink_free_count(sdata) || (sta->plid && sta->plid != plid)) event = OPN_IGNR; else event = OPN_ACPT; break; - case PLINK_CONFIRM: + case WLAN_SP_MESH_PEERING_CONFIRM: if (!mesh_plink_free_count(sdata) || (sta->llid != llid || sta->plid != plid)) event = CNF_IGNR; else event = CNF_ACPT; break; - case PLINK_CLOSE: + case WLAN_SP_MESH_PEERING_CLOSE: if (sta->plink_state == NL80211_PLINK_ESTAB) /* Do not check for llid or plid. This does not * follow the standard but since multiple plinks @@ -620,10 +655,12 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m sta->llid = llid; mesh_plink_timer_set(sta, dot11MeshRetryTimeout(sdata)); spin_unlock_bh(&sta->lock); - mesh_plink_frame_tx(sdata, PLINK_OPEN, sta->sta.addr, llid, - 0, 0); - mesh_plink_frame_tx(sdata, PLINK_CONFIRM, sta->sta.addr, - llid, plid, 0); + mesh_plink_frame_tx(sdata, + WLAN_SP_MESH_PEERING_OPEN, + sta->sta.addr, llid, 0, 0); + mesh_plink_frame_tx(sdata, + WLAN_SP_MESH_PEERING_CONFIRM, + sta->sta.addr, llid, plid, 0); break; default: spin_unlock_bh(&sta->lock); @@ -635,10 +672,10 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m switch (event) { case OPN_RJCT: case CNF_RJCT: - reason = cpu_to_le16(MESH_CAPABILITY_POLICY_VIOLATION); + reason = cpu_to_le16(WLAN_REASON_MESH_CONFIG); case CLS_ACPT: if (!reason) - reason = cpu_to_le16(MESH_CLOSE_RCVD); + reason = cpu_to_le16(WLAN_REASON_MESH_CLOSE); sta->reason = reason; sta->plink_state = NL80211_PLINK_HOLDING; if (!mod_plink_timer(sta, @@ -647,8 +684,9 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m llid = sta->llid; spin_unlock_bh(&sta->lock); - mesh_plink_frame_tx(sdata, PLINK_CLOSE, sta->sta.addr, llid, - plid, reason); + mesh_plink_frame_tx(sdata, + WLAN_SP_MESH_PEERING_CLOSE, + sta->sta.addr, llid, plid, reason); break; case OPN_ACPT: /* retry timer is left untouched */ @@ -656,8 +694,9 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m sta->plid = plid; llid = sta->llid; spin_unlock_bh(&sta->lock); - mesh_plink_frame_tx(sdata, PLINK_CONFIRM, sta->sta.addr, llid, - plid, 0); + mesh_plink_frame_tx(sdata, + WLAN_SP_MESH_PEERING_CONFIRM, + sta->sta.addr, llid, plid, 0); break; case CNF_ACPT: sta->plink_state = NL80211_PLINK_CNF_RCVD; @@ -677,10 +716,10 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m switch (event) { case OPN_RJCT: case CNF_RJCT: - reason = cpu_to_le16(MESH_CAPABILITY_POLICY_VIOLATION); + reason = cpu_to_le16(WLAN_REASON_MESH_CONFIG); case CLS_ACPT: if (!reason) - reason = cpu_to_le16(MESH_CLOSE_RCVD); + reason = cpu_to_le16(WLAN_REASON_MESH_CLOSE); sta->reason = reason; sta->plink_state = NL80211_PLINK_HOLDING; if (!mod_plink_timer(sta, @@ -689,14 +728,15 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m llid = sta->llid; spin_unlock_bh(&sta->lock); - mesh_plink_frame_tx(sdata, PLINK_CLOSE, sta->sta.addr, llid, - plid, reason); + mesh_plink_frame_tx(sdata, WLAN_SP_MESH_PEERING_CLOSE, + sta->sta.addr, llid, plid, reason); break; case OPN_ACPT: llid = sta->llid; spin_unlock_bh(&sta->lock); - mesh_plink_frame_tx(sdata, PLINK_CONFIRM, sta->sta.addr, llid, - plid, 0); + mesh_plink_frame_tx(sdata, + WLAN_SP_MESH_PEERING_CONFIRM, + sta->sta.addr, llid, plid, 0); break; case CNF_ACPT: del_timer(&sta->plink_timer); @@ -717,10 +757,10 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m switch (event) { case OPN_RJCT: case CNF_RJCT: - reason = cpu_to_le16(MESH_CAPABILITY_POLICY_VIOLATION); + reason = cpu_to_le16(WLAN_REASON_MESH_CONFIG); case CLS_ACPT: if (!reason) - reason = cpu_to_le16(MESH_CLOSE_RCVD); + reason = cpu_to_le16(WLAN_REASON_MESH_CLOSE); sta->reason = reason; sta->plink_state = NL80211_PLINK_HOLDING; if (!mod_plink_timer(sta, @@ -729,8 +769,9 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m llid = sta->llid; spin_unlock_bh(&sta->lock); - mesh_plink_frame_tx(sdata, PLINK_CLOSE, sta->sta.addr, llid, - plid, reason); + mesh_plink_frame_tx(sdata, + WLAN_SP_MESH_PEERING_CLOSE, + sta->sta.addr, llid, plid, reason); break; case OPN_ACPT: del_timer(&sta->plink_timer); @@ -740,8 +781,9 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON); mpl_dbg("Mesh plink with %pM ESTABLISHED\n", sta->sta.addr); - mesh_plink_frame_tx(sdata, PLINK_CONFIRM, sta->sta.addr, llid, - plid, 0); + mesh_plink_frame_tx(sdata, + WLAN_SP_MESH_PEERING_CONFIRM, + sta->sta.addr, llid, plid, 0); break; default: spin_unlock_bh(&sta->lock); @@ -752,7 +794,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m case NL80211_PLINK_ESTAB: switch (event) { case CLS_ACPT: - reason = cpu_to_le16(MESH_CLOSE_RCVD); + reason = cpu_to_le16(WLAN_REASON_MESH_CLOSE); sta->reason = reason; deactivated = __mesh_plink_deactivate(sta); sta->plink_state = NL80211_PLINK_HOLDING; @@ -761,14 +803,15 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m spin_unlock_bh(&sta->lock); if (deactivated) ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON); - mesh_plink_frame_tx(sdata, PLINK_CLOSE, sta->sta.addr, llid, - plid, reason); + mesh_plink_frame_tx(sdata, WLAN_SP_MESH_PEERING_CLOSE, + sta->sta.addr, llid, plid, reason); break; case OPN_ACPT: llid = sta->llid; spin_unlock_bh(&sta->lock); - mesh_plink_frame_tx(sdata, PLINK_CONFIRM, sta->sta.addr, llid, - plid, 0); + mesh_plink_frame_tx(sdata, + WLAN_SP_MESH_PEERING_CONFIRM, + sta->sta.addr, llid, plid, 0); break; default: spin_unlock_bh(&sta->lock); @@ -790,8 +833,8 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m llid = sta->llid; reason = sta->reason; spin_unlock_bh(&sta->lock); - mesh_plink_frame_tx(sdata, PLINK_CLOSE, sta->sta.addr, - llid, plid, reason); + mesh_plink_frame_tx(sdata, WLAN_SP_MESH_PEERING_CLOSE, + sta->sta.addr, llid, plid, reason); break; default: spin_unlock_bh(&sta->lock); diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index d6470c7fd6ce..ecb4c84c1bb3 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -16,10 +16,12 @@ #include <linux/skbuff.h> #include <linux/if_arp.h> #include <linux/etherdevice.h> +#include <linux/moduleparam.h> #include <linux/rtnetlink.h> -#include <linux/pm_qos_params.h> +#include <linux/pm_qos.h> #include <linux/crc32.h> #include <linux/slab.h> +#include <linux/export.h> #include <net/mac80211.h> #include <asm/unaligned.h> @@ -160,7 +162,8 @@ static int ecw2cw(int ecw) */ static u32 ieee80211_enable_ht(struct ieee80211_sub_if_data *sdata, struct ieee80211_ht_info *hti, - const u8 *bssid, u16 ap_ht_cap_flags) + const u8 *bssid, u16 ap_ht_cap_flags, + bool beacon_htcap_ie) { struct ieee80211_local *local = sdata->local; struct ieee80211_supported_band *sband; @@ -206,6 +209,7 @@ static u32 ieee80211_enable_ht(struct ieee80211_sub_if_data *sdata, channel_type = NL80211_CHAN_HT20; if (!(ap_ht_cap_flags & IEEE80211_HT_CAP_40MHZ_INTOLERANT) && + !ieee80111_cfg_override_disables_ht40(sdata) && (sband->ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40) && (hti->ht_param & IEEE80211_HT_PARAM_CHAN_WIDTH_ANY)) { switch(hti->ht_param & IEEE80211_HT_PARAM_CHA_SEC_OFFSET) { @@ -232,6 +236,21 @@ static u32 ieee80211_enable_ht(struct ieee80211_sub_if_data *sdata, WARN_ON(!ieee80211_set_channel_type(local, sdata, channel_type)); } + if (beacon_htcap_ie && (prev_chantype != channel_type)) { + /* + * Whenever the AP announces the HT mode change that can be + * 40MHz intolerant or etc., it would be safer to stop tx + * queues before doing hw config to avoid buffer overflow. + */ + ieee80211_stop_queues_by_reason(&sdata->local->hw, + IEEE80211_QUEUE_STOP_REASON_CHTYPE_CHANGE); + + /* flush out all packets */ + synchronize_net(); + + drv_flush(local, false); + } + /* channel_type change automatically detected */ ieee80211_hw_config(local, 0); @@ -243,6 +262,10 @@ static u32 ieee80211_enable_ht(struct ieee80211_sub_if_data *sdata, IEEE80211_RC_HT_CHANGED, channel_type); rcu_read_unlock(); + + if (beacon_htcap_ie) + ieee80211_wake_queues_by_reason(&sdata->local->hw, + IEEE80211_QUEUE_STOP_REASON_CHTYPE_CHANGE); } ht_opmode = le16_to_cpu(hti->operation_mode); @@ -271,11 +294,9 @@ static void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt; skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*mgmt)); - if (!skb) { - printk(KERN_DEBUG "%s: failed to allocate buffer for " - "deauth/disassoc frame\n", sdata->name); + if (!skb) return; - } + skb_reserve(skb, local->hw.extra_tx_headroom); mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24); @@ -330,6 +351,7 @@ void ieee80211_send_nullfunc(struct ieee80211_local *local, { struct sk_buff *skb; struct ieee80211_hdr_3addr *nullfunc; + struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; skb = ieee80211_nullfunc_get(&local->hw, &sdata->vif); if (!skb) @@ -340,6 +362,10 @@ void ieee80211_send_nullfunc(struct ieee80211_local *local, nullfunc->frame_control |= cpu_to_le16(IEEE80211_FCTL_PM); IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT; + if (ifmgd->flags & (IEEE80211_STA_BEACON_POLL | + IEEE80211_STA_CONNECTION_POLL)) + IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_CTL_USE_MINRATE; + ieee80211_tx_skb(sdata, skb); } @@ -354,11 +380,9 @@ static void ieee80211_send_4addr_nullfunc(struct ieee80211_local *local, return; skb = dev_alloc_skb(local->hw.extra_tx_headroom + 30); - if (!skb) { - printk(KERN_DEBUG "%s: failed to allocate buffer for 4addr " - "nullfunc frame\n", sdata->name); + if (!skb) return; - } + skb_reserve(skb, local->hw.extra_tx_headroom); nullfunc = (struct ieee80211_hdr *) skb_put(skb, 30); @@ -394,6 +418,9 @@ static void ieee80211_chswitch_work(struct work_struct *work) /* call "hw_config" only if doing sw channel switch */ ieee80211_hw_config(sdata->local, IEEE80211_CONF_CHANGE_CHANNEL); + } else { + /* update the device channel directly */ + sdata->local->hw.conf.channel = sdata->local->oper_channel; } /* XXX: shouldn't really modify cfg80211-owned data! */ @@ -608,11 +635,14 @@ static bool ieee80211_powersave_allowed(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_managed *mgd = &sdata->u.mgd; struct sta_info *sta = NULL; - u32 sta_flags = 0; + bool authorized = false; if (!mgd->powersave) return false; + if (mgd->broken_ap) + return false; + if (!mgd->associated) return false; @@ -626,13 +656,10 @@ static bool ieee80211_powersave_allowed(struct ieee80211_sub_if_data *sdata) rcu_read_lock(); sta = sta_info_get(sdata, mgd->bssid); if (sta) - sta_flags = get_sta_flags(sta); + authorized = test_sta_flag(sta, WLAN_STA_AUTHORIZED); rcu_read_unlock(); - if (!(sta_flags & WLAN_STA_AUTHORIZED)) - return false; - - return true; + return authorized; } /* need to hold RTNL or interface lock */ @@ -792,7 +819,7 @@ void ieee80211_dynamic_ps_enable_work(struct work_struct *work) } if ((local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK) && - (!(ifmgd->flags & IEEE80211_STA_NULLFUNC_ACKED))) { + !(ifmgd->flags & IEEE80211_STA_NULLFUNC_ACKED)) { netif_tx_stop_all_queues(sdata->dev); if (drv_tx_frames_pending(local)) @@ -917,8 +944,8 @@ static void ieee80211_sta_wmm_params(struct ieee80211_local *local, params.aifs, params.cw_min, params.cw_max, params.txop, params.uapsd); #endif - local->tx_conf[queue] = params; - if (drv_conf_tx(local, queue, ¶ms)) + sdata->tx_conf[queue] = params; + if (drv_conf_tx(local, sdata, queue, ¶ms)) wiphy_debug(local->hw.wiphy, "failed to set TX queue parameters for queue %d\n", queue); @@ -1076,7 +1103,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, mutex_lock(&local->sta_mtx); sta = sta_info_get(sdata, bssid); if (sta) { - set_sta_flags(sta, WLAN_STA_BLOCK_BA); + set_sta_flag(sta, WLAN_STA_BLOCK_BA); ieee80211_sta_tear_down_BA_sessions(sta, tx); } mutex_unlock(&local->sta_mtx); @@ -1094,6 +1121,8 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, /* on the next assoc, re-program HT parameters */ sdata->ht_opmode_valid = false; + memset(&ifmgd->ht_capa, 0, sizeof(ifmgd->ht_capa)); + memset(&ifmgd->ht_capa_mask, 0, sizeof(ifmgd->ht_capa_mask)); local->power_constr_level = 0; @@ -1118,8 +1147,9 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, changed |= BSS_CHANGED_BSSID | BSS_CHANGED_HT; ieee80211_bss_info_change_notify(sdata, changed); + /* remove AP and TDLS peers */ if (remove_sta) - sta_info_destroy_addr(sdata, bssid); + sta_info_flush(local, sdata); del_timer_sync(&sdata->u.mgd.conn_mon_timer); del_timer_sync(&sdata->u.mgd.bcn_mon_timer); @@ -1220,7 +1250,7 @@ static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata) } else { ssid = ieee80211_bss_get_ie(ifmgd->associated, WLAN_EID_SSID); ieee80211_send_probe_req(sdata, dst, ssid + 2, ssid[1], NULL, 0, - (u32) -1, true); + (u32) -1, true, false); } ifmgd->probe_send_count++; @@ -1332,9 +1362,6 @@ static void __ieee80211_connection_loss(struct ieee80211_sub_if_data *sdata) ieee80211_set_disassoc(sdata, true, true); mutex_unlock(&ifmgd->mtx); - mutex_lock(&local->mtx); - ieee80211_recalc_idle(local); - mutex_unlock(&local->mtx); /* * must be outside lock due to cfg80211, * but that's not a problem. @@ -1343,6 +1370,10 @@ static void __ieee80211_connection_loss(struct ieee80211_sub_if_data *sdata) IEEE80211_STYPE_DEAUTH, WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY, NULL, true); + + mutex_lock(&local->mtx); + ieee80211_recalc_idle(local); + mutex_unlock(&local->mtx); } void ieee80211_beacon_connection_loss_work(struct work_struct *work) @@ -1350,6 +1381,16 @@ void ieee80211_beacon_connection_loss_work(struct work_struct *work) struct ieee80211_sub_if_data *sdata = container_of(work, struct ieee80211_sub_if_data, u.mgd.beacon_connection_loss_work); + struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; + struct sta_info *sta; + + if (ifmgd->associated) { + rcu_read_lock(); + sta = sta_info_get(sdata, ifmgd->bssid); + if (sta) + sta->beacon_loss_count++; + rcu_read_unlock(); + } if (sdata->local->hw.flags & IEEE80211_HW_CONNECTION_MONITOR) __ieee80211_connection_loss(sdata); @@ -1441,6 +1482,47 @@ ieee80211_rx_mgmt_disassoc(struct ieee80211_sub_if_data *sdata, return RX_MGMT_CFG80211_DISASSOC; } +static void ieee80211_get_rates(struct ieee80211_supported_band *sband, + u8 *supp_rates, unsigned int supp_rates_len, + u32 *rates, u32 *basic_rates, + bool *have_higher_than_11mbit, + int *min_rate, int *min_rate_index) +{ + int i, j; + + for (i = 0; i < supp_rates_len; i++) { + int rate = (supp_rates[i] & 0x7f) * 5; + bool is_basic = !!(supp_rates[i] & 0x80); + + if (rate > 110) + *have_higher_than_11mbit = true; + + /* + * BSS_MEMBERSHIP_SELECTOR_HT_PHY is defined in 802.11n-2009 + * 7.3.2.2 as a magic value instead of a rate. Hence, skip it. + * + * Note: Even through the membership selector and the basic + * rate flag share the same bit, they are not exactly + * the same. + */ + if (!!(supp_rates[i] & 0x80) && + (supp_rates[i] & 0x7f) == BSS_MEMBERSHIP_SELECTOR_HT_PHY) + continue; + + for (j = 0; j < sband->n_bitrates; j++) { + if (sband->bitrates[j].bitrate == rate) { + *rates |= BIT(j); + if (is_basic) + *basic_rates |= BIT(j); + if (rate < *min_rate) { + *min_rate = rate; + *min_rate_index = j; + } + break; + } + } + } +} static bool ieee80211_assoc_success(struct ieee80211_work *wk, struct ieee80211_mgmt *mgmt, size_t len) @@ -1457,9 +1539,10 @@ static bool ieee80211_assoc_success(struct ieee80211_work *wk, struct ieee802_11_elems elems; struct ieee80211_bss_conf *bss_conf = &sdata->vif.bss_conf; u32 changed = 0; - int i, j, err; + int err; bool have_higher_than_11mbit = false; u16 ap_ht_cap_flags; + int min_rate = INT_MAX, min_rate_index = -1; /* AssocResp and ReassocResp have identical structure */ @@ -1467,10 +1550,21 @@ static bool ieee80211_assoc_success(struct ieee80211_work *wk, capab_info = le16_to_cpu(mgmt->u.assoc_resp.capab_info); if ((aid & (BIT(15) | BIT(14))) != (BIT(15) | BIT(14))) - printk(KERN_DEBUG "%s: invalid aid value %d; bits 15:14 not " - "set\n", sdata->name, aid); + printk(KERN_DEBUG + "%s: invalid AID value 0x%x; bits 15:14 not set\n", + sdata->name, aid); aid &= ~(BIT(15) | BIT(14)); + ifmgd->broken_ap = false; + + if (aid == 0 || aid > IEEE80211_MAX_AID) { + printk(KERN_DEBUG + "%s: invalid AID value %d (out of range), turn off PS\n", + sdata->name, aid); + aid = 0; + ifmgd->broken_ap = true; + } + pos = mgmt->u.assoc_resp.variable; ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), &elems); @@ -1482,54 +1576,43 @@ static bool ieee80211_assoc_success(struct ieee80211_work *wk, ifmgd->aid = aid; - sta = sta_info_alloc(sdata, cbss->bssid, GFP_KERNEL); - if (!sta) { - printk(KERN_DEBUG "%s: failed to alloc STA entry for" - " the AP\n", sdata->name); + mutex_lock(&sdata->local->sta_mtx); + /* + * station info was already allocated and inserted before + * the association and should be available to us + */ + sta = sta_info_get_rx(sdata, cbss->bssid); + if (WARN_ON(!sta)) { + mutex_unlock(&sdata->local->sta_mtx); return false; } - set_sta_flags(sta, WLAN_STA_AUTH | WLAN_STA_ASSOC | - WLAN_STA_ASSOC_AP); + sta_info_move_state(sta, IEEE80211_STA_AUTH); + sta_info_move_state(sta, IEEE80211_STA_ASSOC); if (!(ifmgd->flags & IEEE80211_STA_CONTROL_PORT)) - set_sta_flags(sta, WLAN_STA_AUTHORIZED); + sta_info_move_state(sta, IEEE80211_STA_AUTHORIZED); rates = 0; basic_rates = 0; sband = local->hw.wiphy->bands[wk->chan->band]; - for (i = 0; i < elems.supp_rates_len; i++) { - int rate = (elems.supp_rates[i] & 0x7f) * 5; - bool is_basic = !!(elems.supp_rates[i] & 0x80); + ieee80211_get_rates(sband, elems.supp_rates, elems.supp_rates_len, + &rates, &basic_rates, &have_higher_than_11mbit, + &min_rate, &min_rate_index); - if (rate > 110) - have_higher_than_11mbit = true; + ieee80211_get_rates(sband, elems.ext_supp_rates, + elems.ext_supp_rates_len, &rates, &basic_rates, + &have_higher_than_11mbit, + &min_rate, &min_rate_index); - for (j = 0; j < sband->n_bitrates; j++) { - if (sband->bitrates[j].bitrate == rate) { - rates |= BIT(j); - if (is_basic) - basic_rates |= BIT(j); - break; - } - } - } - - for (i = 0; i < elems.ext_supp_rates_len; i++) { - int rate = (elems.ext_supp_rates[i] & 0x7f) * 5; - bool is_basic = !!(elems.ext_supp_rates[i] & 0x80); - - if (rate > 110) - have_higher_than_11mbit = true; - - for (j = 0; j < sband->n_bitrates; j++) { - if (sband->bitrates[j].bitrate == rate) { - rates |= BIT(j); - if (is_basic) - basic_rates |= BIT(j); - break; - } - } + /* + * some buggy APs don't advertise basic_rates. use the lowest + * supported rate instead. + */ + if (unlikely(!basic_rates) && min_rate_index >= 0) { + printk(KERN_DEBUG "%s: No basic rates in AssocResp. " + "Using min supported rate instead.\n", sdata->name); + basic_rates = BIT(min_rate_index); } sta->sta.supp_rates[wk->chan->band] = rates; @@ -1543,7 +1626,7 @@ static bool ieee80211_assoc_success(struct ieee80211_work *wk, sdata->flags &= ~IEEE80211_SDATA_OPERATING_GMODE; if (elems.ht_cap_elem && !(ifmgd->flags & IEEE80211_STA_DISABLE_11N)) - ieee80211_ht_cap_ie_to_sta_ht_cap(sband, + ieee80211_ht_cap_ie_to_sta_ht_cap(sdata, sband, elems.ht_cap_elem, &sta->sta.ht_cap); ap_ht_cap_flags = sta->sta.ht_cap.cap; @@ -1551,12 +1634,13 @@ static bool ieee80211_assoc_success(struct ieee80211_work *wk, rate_control_rate_init(sta); if (ifmgd->flags & IEEE80211_STA_MFP_ENABLED) - set_sta_flags(sta, WLAN_STA_MFP); + set_sta_flag(sta, WLAN_STA_MFP); if (elems.wmm_param) - set_sta_flags(sta, WLAN_STA_WME); + set_sta_flag(sta, WLAN_STA_WME); - err = sta_info_insert(sta); + /* sta_info_reinsert will also unlock the mutex lock */ + err = sta_info_reinsert(sta); sta = NULL; if (err) { printk(KERN_DEBUG "%s: failed to insert STA entry for" @@ -1584,7 +1668,8 @@ static bool ieee80211_assoc_success(struct ieee80211_work *wk, (sdata->local->hw.queues >= 4) && !(ifmgd->flags & IEEE80211_STA_DISABLE_11N)) changed |= ieee80211_enable_ht(sdata, elems.ht_info_elem, - cbss->bssid, ap_ht_cap_flags); + cbss->bssid, ap_ht_cap_flags, + false); /* set AID and assoc capability, * ieee80211_set_associated() will tell the driver */ @@ -1910,7 +1995,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; - ieee80211_ht_cap_ie_to_sta_ht_cap(sband, + ieee80211_ht_cap_ie_to_sta_ht_cap(sdata, sband, elems.ht_cap_elem, &sta->sta.ht_cap); ap_ht_cap_flags = sta->sta.ht_cap.cap; @@ -1918,7 +2003,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, rcu_read_unlock(); changed |= ieee80211_enable_ht(sdata, elems.ht_info_elem, - bssid, ap_ht_cap_flags); + bssid, ap_ht_cap_flags, true); } /* Note: country IE parsing is done for us by cfg80211 */ @@ -2064,9 +2149,6 @@ static void ieee80211_sta_connection_lost(struct ieee80211_sub_if_data *sdata, ieee80211_set_disassoc(sdata, true, true); mutex_unlock(&ifmgd->mtx); - mutex_lock(&local->mtx); - ieee80211_recalc_idle(local); - mutex_unlock(&local->mtx); /* * must be outside lock due to cfg80211, * but that's not a problem. @@ -2074,6 +2156,11 @@ static void ieee80211_sta_connection_lost(struct ieee80211_sub_if_data *sdata, ieee80211_send_deauth_disassoc(sdata, bssid, IEEE80211_STYPE_DEAUTH, reason, NULL, true); + + mutex_lock(&local->mtx); + ieee80211_recalc_idle(local); + mutex_unlock(&local->mtx); + mutex_lock(&ifmgd->mtx); } @@ -2224,6 +2311,7 @@ void ieee80211_sta_quiesce(struct ieee80211_sub_if_data *sdata) cancel_work_sync(&ifmgd->request_smps_work); + cancel_work_sync(&ifmgd->monitor_work); cancel_work_sync(&ifmgd->beacon_connection_loss_work); if (del_timer_sync(&ifmgd->timer)) set_bit(TMR_RUNNING_TIMER, &ifmgd->timers_running); @@ -2232,7 +2320,6 @@ void ieee80211_sta_quiesce(struct ieee80211_sub_if_data *sdata) if (del_timer_sync(&ifmgd->chswitch_timer)) set_bit(TMR_RUNNING_CHANSW, &ifmgd->timers_running); - cancel_work_sync(&ifmgd->monitor_work); /* these will just be re-established on connection */ del_timer_sync(&ifmgd->conn_mon_timer); del_timer_sync(&ifmgd->bcn_mon_timer); @@ -2294,6 +2381,7 @@ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata) (unsigned long) sdata); ifmgd->flags = 0; + ifmgd->powersave = sdata->wdev.ps; mutex_init(&ifmgd->mtx); @@ -2429,6 +2517,29 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata, return 0; } +/* create and insert a dummy station entry */ +static int ieee80211_pre_assoc(struct ieee80211_sub_if_data *sdata, + u8 *bssid) { + struct sta_info *sta; + int err; + + sta = sta_info_alloc(sdata, bssid, GFP_KERNEL); + if (!sta) + return -ENOMEM; + + sta->dummy = true; + + err = sta_info_insert(sta); + sta = NULL; + if (err) { + printk(KERN_DEBUG "%s: failed to insert Dummy STA entry for" + " the AP (error %d)\n", sdata->name, err); + return err; + } + + return 0; +} + static enum work_done_result ieee80211_assoc_done(struct ieee80211_work *wk, struct sk_buff *skb) { @@ -2436,9 +2547,11 @@ static enum work_done_result ieee80211_assoc_done(struct ieee80211_work *wk, struct ieee80211_mgmt *mgmt; struct ieee80211_rx_status *rx_status; struct ieee802_11_elems elems; + struct cfg80211_bss *cbss = wk->assoc.bss; u16 status; if (!skb) { + sta_info_destroy_addr(wk->sdata, cbss->bssid); cfg80211_send_assoc_timeout(wk->sdata->dev, wk->filter_ta); goto destroy; } @@ -2468,12 +2581,16 @@ static enum work_done_result ieee80211_assoc_done(struct ieee80211_work *wk, if (!ieee80211_assoc_success(wk, mgmt, skb->len)) { mutex_unlock(&wk->sdata->u.mgd.mtx); /* oops -- internal error -- send timeout for now */ + sta_info_destroy_addr(wk->sdata, cbss->bssid); cfg80211_send_assoc_timeout(wk->sdata->dev, wk->filter_ta); return WORK_DONE_DESTROY; } mutex_unlock(&wk->sdata->u.mgd.mtx); + } else { + /* assoc failed - destroy the dummy station entry */ + sta_info_destroy_addr(wk->sdata, cbss->bssid); } cfg80211_send_rx_assoc(wk->sdata->dev, skb->data, skb->len); @@ -2492,7 +2609,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, struct ieee80211_bss *bss = (void *)req->bss->priv; struct ieee80211_work *wk; const u8 *ssid; - int i; + int i, err; mutex_lock(&ifmgd->mtx); if (ifmgd->associated) { @@ -2517,6 +2634,16 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, if (!wk) return -ENOMEM; + /* + * create a dummy station info entry in order + * to start accepting incoming EAPOL packets from the station + */ + err = ieee80211_pre_assoc(sdata, req->bss->bssid); + if (err) { + kfree(wk); + return err; + } + ifmgd->flags &= ~IEEE80211_STA_DISABLE_11N; ifmgd->flags &= ~IEEE80211_STA_NULLFUNC_ACKED; @@ -2529,6 +2656,13 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, ifmgd->flags |= IEEE80211_STA_DISABLE_11N; + if (req->flags & ASSOC_REQ_DISABLE_HT) + ifmgd->flags |= IEEE80211_STA_DISABLE_11N; + + memcpy(&ifmgd->ht_capa, &req->ht_capa, sizeof(ifmgd->ht_capa)); + memcpy(&ifmgd->ht_capa_mask, &req->ht_capa_mask, + sizeof(ifmgd->ht_capa_mask)); + if (req->ie && req->ie_len) { memcpy(wk->ie, req->ie, req->ie_len); wk->ie_len = req->ie_len; @@ -2674,7 +2808,7 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata, req->reason_code, cookie, !req->local_state_change); if (assoc_bss) - sta_info_destroy_addr(sdata, bssid); + sta_info_flush(sdata->local, sdata); mutex_lock(&sdata->local->mtx); ieee80211_recalc_idle(sdata->local); @@ -2714,7 +2848,7 @@ int ieee80211_mgd_disassoc(struct ieee80211_sub_if_data *sdata, ieee80211_send_deauth_disassoc(sdata, req->bss->bssid, IEEE80211_STYPE_DISASSOC, req->reason_code, cookie, !req->local_state_change); - sta_info_destroy_addr(sdata, bssid); + sta_info_flush(sdata->local, sdata); mutex_lock(&sdata->local->mtx); ieee80211_recalc_idle(sdata->local); diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c index 13427b194ced..f054e94901a2 100644 --- a/net/mac80211/offchannel.c +++ b/net/mac80211/offchannel.c @@ -12,6 +12,7 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ +#include <linux/export.h> #include <net/mac80211.h> #include "ieee80211_i.h" #include "driver-trace.h" @@ -137,31 +138,16 @@ void ieee80211_offchannel_stop_vifs(struct ieee80211_local *local, mutex_unlock(&local->iflist_mtx); } -void ieee80211_offchannel_enable_all_ps(struct ieee80211_local *local, - bool tell_ap) -{ - struct ieee80211_sub_if_data *sdata; - - mutex_lock(&local->iflist_mtx); - list_for_each_entry(sdata, &local->interfaces, list) { - if (!ieee80211_sdata_running(sdata)) - continue; - - if (sdata->vif.type == NL80211_IFTYPE_STATION && - sdata->u.mgd.associated) - ieee80211_offchannel_ps_enable(sdata, tell_ap); - } - mutex_unlock(&local->iflist_mtx); -} - void ieee80211_offchannel_return(struct ieee80211_local *local, - bool enable_beaconing, bool offchannel_ps_disable) { struct ieee80211_sub_if_data *sdata; mutex_lock(&local->iflist_mtx); list_for_each_entry(sdata, &local->interfaces, list) { + if (sdata->vif.type != NL80211_IFTYPE_MONITOR) + clear_bit(SDATA_STATE_OFFCHANNEL, &sdata->state); + if (!ieee80211_sdata_running(sdata)) continue; @@ -173,7 +159,6 @@ void ieee80211_offchannel_return(struct ieee80211_local *local, } if (sdata->vif.type != NL80211_IFTYPE_MONITOR) { - clear_bit(SDATA_STATE_OFFCHANNEL, &sdata->state); /* * This may wake up queues even though the driver * currently has them stopped. This is not very @@ -187,11 +172,9 @@ void ieee80211_offchannel_return(struct ieee80211_local *local, netif_tx_wake_all_queues(sdata->dev); } - /* Check to see if we should re-enable beaconing */ - if (enable_beaconing && - (sdata->vif.type == NL80211_IFTYPE_AP || - sdata->vif.type == NL80211_IFTYPE_ADHOC || - sdata->vif.type == NL80211_IFTYPE_MESH_POINT)) + if (sdata->vif.type == NL80211_IFTYPE_AP || + sdata->vif.type == NL80211_IFTYPE_ADHOC || + sdata->vif.type == NL80211_IFTYPE_MESH_POINT) ieee80211_bss_info_change_notify( sdata, BSS_CHANGED_BEACON_ENABLED); } @@ -211,8 +194,6 @@ static void ieee80211_hw_roc_start(struct work_struct *work) return; } - ieee80211_recalc_idle(local); - if (local->hw_roc_skb) { sdata = IEEE80211_DEV_TO_SUB_IF(local->hw_roc_dev); ieee80211_tx_skb(sdata, local->hw_roc_skb); @@ -226,6 +207,8 @@ static void ieee80211_hw_roc_start(struct work_struct *work) GFP_KERNEL); } + ieee80211_recalc_idle(local); + mutex_unlock(&local->mtx); } diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c index 6326d3439861..596efaf50e09 100644 --- a/net/mac80211/pm.c +++ b/net/mac80211/pm.c @@ -42,7 +42,7 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) if (hw->flags & IEEE80211_HW_AMPDU_AGGREGATION) { mutex_lock(&local->sta_mtx); list_for_each_entry(sta, &local->sta_list, list) { - set_sta_flags(sta, WLAN_STA_BLOCK_BA); + set_sta_flag(sta, WLAN_STA_BLOCK_BA); ieee80211_sta_tear_down_BA_sessions(sta, true); } mutex_unlock(&local->sta_mtx); @@ -125,7 +125,7 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED); - drv_remove_interface(local, &sdata->vif); + drv_remove_interface(local, sdata); } /* stop hardware - this must stop RX */ diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c index 3d5a2cb835c4..5a5a7767d541 100644 --- a/net/mac80211/rate.c +++ b/net/mac80211/rate.c @@ -11,6 +11,7 @@ #include <linux/kernel.h> #include <linux/rtnetlink.h> #include <linux/slab.h> +#include <linux/module.h> #include "rate.h" #include "ieee80211_i.h" #include "debugfs.h" @@ -199,7 +200,7 @@ static void rate_control_release(struct kref *kref) kfree(ctrl_ref); } -static bool rc_no_data_or_no_ack(struct ieee80211_tx_rate_control *txrc) +static bool rc_no_data_or_no_ack_use_min(struct ieee80211_tx_rate_control *txrc) { struct sk_buff *skb = txrc->skb; struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; @@ -208,7 +209,9 @@ static bool rc_no_data_or_no_ack(struct ieee80211_tx_rate_control *txrc) fc = hdr->frame_control; - return (info->flags & IEEE80211_TX_CTL_NO_ACK) || !ieee80211_is_data(fc); + return (info->flags & (IEEE80211_TX_CTL_NO_ACK | + IEEE80211_TX_CTL_USE_MINRATE)) || + !ieee80211_is_data(fc); } static void rc_send_low_broadcast(s8 *idx, u32 basic_rates, @@ -233,6 +236,27 @@ static void rc_send_low_broadcast(s8 *idx, u32 basic_rates, /* could not find a basic rate; use original selection */ } +static inline s8 +rate_lowest_non_cck_index(struct ieee80211_supported_band *sband, + struct ieee80211_sta *sta) +{ + int i; + + for (i = 0; i < sband->n_bitrates; i++) { + struct ieee80211_rate *srate = &sband->bitrates[i]; + if ((srate->bitrate == 10) || (srate->bitrate == 20) || + (srate->bitrate == 55) || (srate->bitrate == 110)) + continue; + + if (rate_supported(sta, sband->band, i)) + return i; + } + + /* No matching rate found */ + return 0; +} + + bool rate_control_send_low(struct ieee80211_sta *sta, void *priv_sta, struct ieee80211_tx_rate_control *txrc) @@ -241,8 +265,14 @@ bool rate_control_send_low(struct ieee80211_sta *sta, struct ieee80211_supported_band *sband = txrc->sband; int mcast_rate; - if (!sta || !priv_sta || rc_no_data_or_no_ack(txrc)) { - info->control.rates[0].idx = rate_lowest_index(txrc->sband, sta); + if (!sta || !priv_sta || rc_no_data_or_no_ack_use_min(txrc)) { + if ((sband->band != IEEE80211_BAND_2GHZ) || + !(info->flags & IEEE80211_TX_CTL_NO_CCK_RATE)) + info->control.rates[0].idx = + rate_lowest_index(txrc->sband, sta); + else + info->control.rates[0].idx = + rate_lowest_non_cck_index(txrc->sband, sta); info->control.rates[0].count = (info->flags & IEEE80211_TX_CTL_NO_ACK) ? 1 : txrc->hw->max_rate_tries; diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c index 58a89554b788..b39dda523f39 100644 --- a/net/mac80211/rc80211_minstrel.c +++ b/net/mac80211/rc80211_minstrel.c @@ -334,8 +334,8 @@ minstrel_get_rate(void *priv, struct ieee80211_sta *sta, static void -calc_rate_durations(struct minstrel_sta_info *mi, struct ieee80211_local *local, - struct minstrel_rate *d, struct ieee80211_rate *rate) +calc_rate_durations(struct ieee80211_local *local, struct minstrel_rate *d, + struct ieee80211_rate *rate) { int erp = !!(rate->flags & IEEE80211_RATE_ERP_G); @@ -402,8 +402,7 @@ minstrel_rate_init(void *priv, struct ieee80211_supported_band *sband, mr->rix = i; mr->bitrate = sband->bitrates[i].bitrate / 5; - calc_rate_durations(mi, local, mr, - &sband->bitrates[i]); + calc_rate_durations(local, mr, &sband->bitrates[i]); /* calculate maximum number of retransmissions before * fallback (based on maximum segment size) */ diff --git a/net/mac80211/rc80211_minstrel_debugfs.c b/net/mac80211/rc80211_minstrel_debugfs.c index a290ad231d77..d5a56226e675 100644 --- a/net/mac80211/rc80211_minstrel_debugfs.c +++ b/net/mac80211/rc80211_minstrel_debugfs.c @@ -50,6 +50,7 @@ #include <linux/debugfs.h> #include <linux/ieee80211.h> #include <linux/slab.h> +#include <linux/export.h> #include <net/mac80211.h> #include "rc80211_minstrel.h" diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index 66a1eeb279c6..ff5f7b84e825 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -36,8 +36,17 @@ /* Transmit duration for the raw data part of an average sized packet */ #define MCS_DURATION(streams, sgi, bps) MCS_SYMBOL_TIME(sgi, MCS_NSYMS((streams) * (bps))) +/* + * Define group sort order: HT40 -> SGI -> #streams + */ +#define GROUP_IDX(_streams, _sgi, _ht40) \ + MINSTREL_MAX_STREAMS * 2 * _ht40 + \ + MINSTREL_MAX_STREAMS * _sgi + \ + _streams - 1 + /* MCS rate information for an MCS group */ -#define MCS_GROUP(_streams, _sgi, _ht40) { \ +#define MCS_GROUP(_streams, _sgi, _ht40) \ + [GROUP_IDX(_streams, _sgi, _ht40)] = { \ .streams = _streams, \ .flags = \ (_sgi ? IEEE80211_TX_RC_SHORT_GI : 0) | \ @@ -58,6 +67,9 @@ * To enable sufficiently targeted rate sampling, MCS rates are divided into * groups, based on the number of streams and flags (HT40, SGI) that they * use. + * + * Sortorder has to be fixed for GROUP_IDX macro to be applicable: + * HT40 -> SGI -> #streams */ const struct mcs_group minstrel_mcs_groups[] = { MCS_GROUP(1, 0, 0), @@ -102,21 +114,9 @@ minstrel_ewma(int old, int new, int weight) static int minstrel_ht_get_group_idx(struct ieee80211_tx_rate *rate) { - int streams = (rate->idx / MCS_GROUP_RATES) + 1; - u32 flags = IEEE80211_TX_RC_SHORT_GI | IEEE80211_TX_RC_40_MHZ_WIDTH; - int i; - - for (i = 0; i < ARRAY_SIZE(minstrel_mcs_groups); i++) { - if (minstrel_mcs_groups[i].streams != streams) - continue; - if (minstrel_mcs_groups[i].flags != (rate->flags & flags)) - continue; - - return i; - } - - WARN_ON(1); - return 0; + return GROUP_IDX((rate->idx / MCS_GROUP_RATES) + 1, + !!(rate->flags & IEEE80211_TX_RC_SHORT_GI), + !!(rate->flags & IEEE80211_TX_RC_40_MHZ_WIDTH)); } static inline struct minstrel_rate_stats * @@ -130,7 +130,7 @@ minstrel_get_ratestats(struct minstrel_ht_sta *mi, int index) * Recalculate success probabilities and counters for a rate using EWMA */ static void -minstrel_calc_rate_ewma(struct minstrel_priv *mp, struct minstrel_rate_stats *mr) +minstrel_calc_rate_ewma(struct minstrel_rate_stats *mr) { if (unlikely(mr->attempts > 0)) { mr->sample_skipped = 0; @@ -156,8 +156,7 @@ minstrel_calc_rate_ewma(struct minstrel_priv *mp, struct minstrel_rate_stats *mr * the expected number of retransmissions and their expected length */ static void -minstrel_ht_calc_tp(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, - int group, int rate) +minstrel_ht_calc_tp(struct minstrel_ht_sta *mi, int group, int rate) { struct minstrel_rate_stats *mr; unsigned int usecs; @@ -226,8 +225,8 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) mr = &mg->rates[i]; mr->retry_updated = false; index = MCS_GROUP_RATES * group + i; - minstrel_calc_rate_ewma(mp, mr); - minstrel_ht_calc_tp(mp, mi, group, i); + minstrel_calc_rate_ewma(mr); + minstrel_ht_calc_tp(mi, group, i); if (!mr->cur_tp) continue; @@ -281,6 +280,8 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) mr = minstrel_get_ratestats(mi, mg->max_tp_rate); if (cur_tp < mr->cur_tp) { + mi->max_tp_rate2 = mi->max_tp_rate; + cur_tp2 = cur_tp; mi->max_tp_rate = mg->max_tp_rate; cur_tp = mr->cur_tp; } @@ -298,10 +299,10 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) static bool minstrel_ht_txstat_valid(struct ieee80211_tx_rate *rate) { - if (!rate->count) + if (rate->idx < 0) return false; - if (rate->idx < 0) + if (!rate->count) return false; return !!(rate->flags & IEEE80211_TX_RC_MCS); @@ -355,7 +356,7 @@ minstrel_downgrade_rate(struct minstrel_ht_sta *mi, unsigned int *idx, } static void -minstrel_aggr_check(struct minstrel_priv *mp, struct ieee80211_sta *pubsta, struct sk_buff *skb) +minstrel_aggr_check(struct ieee80211_sta *pubsta, struct sk_buff *skb) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; struct sta_info *sta = container_of(pubsta, struct sta_info, sta); @@ -452,7 +453,8 @@ minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband, if (time_after(jiffies, mi->stats_update + (mp->update_interval / 2 * HZ) / 1000)) { minstrel_ht_update_stats(mp, mi); - minstrel_aggr_check(mp, sta, skb); + if (!(info->flags & IEEE80211_TX_CTL_AMPDU)) + minstrel_aggr_check(sta, skb); } } @@ -512,7 +514,6 @@ minstrel_calc_retransmit(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, static void minstrel_ht_set_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, struct ieee80211_tx_rate *rate, int index, - struct ieee80211_tx_rate_control *txrc, bool sample, bool rtscts) { const struct mcs_group *group = &minstrel_mcs_groups[index / MCS_GROUP_RATES]; @@ -608,7 +609,13 @@ minstrel_ht_get_rate(void *priv, struct ieee80211_sta *sta, void *priv_sta, return mac80211_minstrel.get_rate(priv, sta, &msp->legacy, txrc); info->flags |= mi->tx_flags; - sample_idx = minstrel_get_sample_rate(mp, mi); + + /* Don't use EAPOL frames for sampling on non-mrr hw */ + if (mp->hw->max_rates == 1 && + txrc->skb->protocol == cpu_to_be16(ETH_P_PAE)) + sample_idx = -1; + else + sample_idx = minstrel_get_sample_rate(mp, mi); #ifdef CONFIG_MAC80211_DEBUGFS /* use fixed index if set */ @@ -619,11 +626,11 @@ minstrel_ht_get_rate(void *priv, struct ieee80211_sta *sta, void *priv_sta, if (sample_idx >= 0) { sample = true; minstrel_ht_set_rate(mp, mi, &ar[0], sample_idx, - txrc, true, false); + true, false); info->flags |= IEEE80211_TX_CTL_RATE_CTRL_PROBE; } else { minstrel_ht_set_rate(mp, mi, &ar[0], mi->max_tp_rate, - txrc, false, false); + false, false); } if (mp->hw->max_rates >= 3) { @@ -634,13 +641,13 @@ minstrel_ht_get_rate(void *priv, struct ieee80211_sta *sta, void *priv_sta, */ if (sample_idx >= 0) minstrel_ht_set_rate(mp, mi, &ar[1], mi->max_tp_rate, - txrc, false, false); + false, false); else minstrel_ht_set_rate(mp, mi, &ar[1], mi->max_tp_rate2, - txrc, false, true); + false, true); minstrel_ht_set_rate(mp, mi, &ar[2], mi->max_prob_rate, - txrc, false, !sample); + false, !sample); ar[3].count = 0; ar[3].idx = -1; @@ -651,7 +658,7 @@ minstrel_ht_get_rate(void *priv, struct ieee80211_sta *sta, void *priv_sta, * max_tp_rate -> max_prob_rate by default. */ minstrel_ht_set_rate(mp, mi, &ar[1], mi->max_prob_rate, - txrc, false, !sample); + false, !sample); ar[2].count = 0; ar[2].idx = -1; diff --git a/net/mac80211/rc80211_minstrel_ht_debugfs.c b/net/mac80211/rc80211_minstrel_ht_debugfs.c index cefcb5d2dae6..e788f76a1dfe 100644 --- a/net/mac80211/rc80211_minstrel_ht_debugfs.c +++ b/net/mac80211/rc80211_minstrel_ht_debugfs.c @@ -10,6 +10,7 @@ #include <linux/skbuff.h> #include <linux/debugfs.h> #include <linux/ieee80211.h> +#include <linux/export.h> #include <net/mac80211.h> #include "rc80211_minstrel.h" #include "rc80211_minstrel_ht.h" diff --git a/net/mac80211/rc80211_pid_algo.c b/net/mac80211/rc80211_pid_algo.c index aeda65466f3e..502d3ecc4a79 100644 --- a/net/mac80211/rc80211_pid_algo.c +++ b/net/mac80211/rc80211_pid_algo.c @@ -318,7 +318,7 @@ rate_control_pid_rate_init(void *priv, struct ieee80211_supported_band *sband, rinfo[i].diff = i * pinfo->norm_offset; } for (i = 1; i < sband->n_bitrates; i++) { - s = 0; + s = false; for (j = 0; j < sband->n_bitrates - i; j++) if (unlikely(sband->bitrates[rinfo[j].index].bitrate > sband->bitrates[rinfo[j + 1].index].bitrate)) { @@ -327,7 +327,7 @@ rate_control_pid_rate_init(void *priv, struct ieee80211_supported_band *sband, rinfo[j + 1].index = tmp; rinfo[rinfo[j].index].rev_index = j; rinfo[rinfo[j + 1].index].rev_index = j + 1; - s = 1; + s = true; } if (!s) break; diff --git a/net/mac80211/rc80211_pid_debugfs.c b/net/mac80211/rc80211_pid_debugfs.c index 4851e9e2daed..c97a0657c043 100644 --- a/net/mac80211/rc80211_pid_debugfs.c +++ b/net/mac80211/rc80211_pid_debugfs.c @@ -13,6 +13,7 @@ #include <linux/types.h> #include <linux/skbuff.h> #include <linux/slab.h> +#include <linux/export.h> #include <net/mac80211.h> #include "rate.h" diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index fe2c2a717793..f407427c642f 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -16,6 +16,7 @@ #include <linux/netdevice.h> #include <linux/etherdevice.h> #include <linux/rcupdate.h> +#include <linux/export.h> #include <net/mac80211.h> #include <net/ieee80211_radiotap.h> @@ -27,6 +28,7 @@ #include "wpa.h" #include "tkip.h" #include "wme.h" +#include "rate.h" /* * monitor mode reception @@ -140,8 +142,9 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, pos++; /* IEEE80211_RADIOTAP_RATE */ - if (status->flag & RX_FLAG_HT) { + if (!rate || status->flag & RX_FLAG_HT) { /* + * Without rate information don't add it. If we have, * MCS information is a separate field in radiotap, * added below. The byte here is needed as padding * for the channel though, so initialise it to 0. @@ -162,12 +165,14 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, else if (status->flag & RX_FLAG_HT) put_unaligned_le16(IEEE80211_CHAN_DYN | IEEE80211_CHAN_2GHZ, pos); - else if (rate->flags & IEEE80211_RATE_ERP_G) + else if (rate && rate->flags & IEEE80211_RATE_ERP_G) put_unaligned_le16(IEEE80211_CHAN_OFDM | IEEE80211_CHAN_2GHZ, pos); - else + else if (rate) put_unaligned_le16(IEEE80211_CHAN_CCK | IEEE80211_CHAN_2GHZ, pos); + else + put_unaligned_le16(IEEE80211_CHAN_2GHZ, pos); pos += 2; /* IEEE80211_RADIOTAP_DBM_ANTSIGNAL */ @@ -476,7 +481,6 @@ static ieee80211_rx_result ieee80211_rx_mesh_check(struct ieee80211_rx_data *rx) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data; - unsigned int hdrlen = ieee80211_hdrlen(hdr->frame_control); char *dev_addr = rx->sdata->vif.addr; if (ieee80211_is_data(hdr->frame_control)) { @@ -524,14 +528,6 @@ ieee80211_rx_mesh_check(struct ieee80211_rx_data *rx) } -#define msh_h_get(h, l) ((struct ieee80211s_hdr *) ((u8 *)h + l)) - - if (ieee80211_is_data(hdr->frame_control) && - is_multicast_ether_addr(hdr->addr1) && - mesh_rmc_check(hdr->addr3, msh_h_get(hdr, hdrlen), rx->sdata)) - return RX_DROP_MONITOR; -#undef msh_h_get - return RX_CONTINUE; } @@ -753,10 +749,11 @@ static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx) struct ieee80211_local *local = rx->local; struct ieee80211_hw *hw = &local->hw; struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; + struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); struct sta_info *sta = rx->sta; struct tid_ampdu_rx *tid_agg_rx; u16 sc; - int tid; + u8 tid, ack_policy; if (!ieee80211_is_data_qos(hdr->frame_control)) goto dont_reorder; @@ -769,6 +766,8 @@ static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx) if (!sta) goto dont_reorder; + ack_policy = *ieee80211_get_qos_ctl(hdr) & + IEEE80211_QOS_CTL_ACK_POLICY_MASK; tid = *ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_TID_MASK; tid_agg_rx = rcu_dereference(sta->ampdu_mlme.tid_rx[tid]); @@ -779,6 +778,15 @@ static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx) if (unlikely(hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_NULLFUNC))) goto dont_reorder; + /* not part of a BA session */ + if (ack_policy != IEEE80211_QOS_CTL_ACK_POLICY_BLOCKACK && + ack_policy != IEEE80211_QOS_CTL_ACK_POLICY_NORMAL) + goto dont_reorder; + + /* not actually part of this BA session */ + if (!(status->rx_flags & IEEE80211_RX_RA_MATCH)) + goto dont_reorder; + /* new, potentially un-ordered, ampdu frame - process it */ /* reset session timer */ @@ -850,8 +858,28 @@ ieee80211_rx_h_check(struct ieee80211_rx_data *rx) ieee80211_is_pspoll(hdr->frame_control)) && rx->sdata->vif.type != NL80211_IFTYPE_ADHOC && rx->sdata->vif.type != NL80211_IFTYPE_WDS && - (!rx->sta || !test_sta_flags(rx->sta, WLAN_STA_ASSOC)))) + (!rx->sta || !test_sta_flag(rx->sta, WLAN_STA_ASSOC)))) { + if (rx->sta && rx->sta->dummy && + ieee80211_is_data_present(hdr->frame_control)) { + u16 ethertype; + u8 *payload; + + payload = rx->skb->data + + ieee80211_hdrlen(hdr->frame_control); + ethertype = (payload[6] << 8) | payload[7]; + if (cpu_to_be16(ethertype) == + rx->sdata->control_port_protocol) + return RX_CONTINUE; + } + + if (rx->sdata->vif.type == NL80211_IFTYPE_AP && + cfg80211_rx_spurious_frame(rx->sdata->dev, + hdr->addr2, + GFP_ATOMIC)) + return RX_DROP_UNUSABLE; + return RX_DROP_MONITOR; + } return RX_CONTINUE; } @@ -1106,7 +1134,7 @@ static void ap_sta_ps_start(struct sta_info *sta) struct ieee80211_local *local = sdata->local; atomic_inc(&sdata->bss->num_sta_ps); - set_sta_flags(sta, WLAN_STA_PS_STA); + set_sta_flag(sta, WLAN_STA_PS_STA); if (!(local->hw.flags & IEEE80211_HW_AP_LINK_PS)) drv_sta_notify(local, sdata, STA_NOTIFY_SLEEP, &sta->sta); #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG @@ -1126,7 +1154,7 @@ static void ap_sta_ps_end(struct sta_info *sta) sdata->name, sta->sta.addr, sta->sta.aid); #endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ - if (test_sta_flags(sta, WLAN_STA_PS_DRIVER)) { + if (test_sta_flag(sta, WLAN_STA_PS_DRIVER)) { #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG printk(KERN_DEBUG "%s: STA %pM aid %d driver-ps-blocked\n", sdata->name, sta->sta.addr, sta->sta.aid); @@ -1145,7 +1173,7 @@ int ieee80211_sta_ps_transition(struct ieee80211_sta *sta, bool start) WARN_ON(!(sta_inf->local->hw.flags & IEEE80211_HW_AP_LINK_PS)); /* Don't let the same PS state be set twice */ - in_ps = test_sta_flags(sta_inf, WLAN_STA_PS_STA); + in_ps = test_sta_flag(sta_inf, WLAN_STA_PS_STA); if ((start && in_ps) || (!start && !in_ps)) return -EINVAL; @@ -1159,6 +1187,81 @@ int ieee80211_sta_ps_transition(struct ieee80211_sta *sta, bool start) EXPORT_SYMBOL(ieee80211_sta_ps_transition); static ieee80211_rx_result debug_noinline +ieee80211_rx_h_uapsd_and_pspoll(struct ieee80211_rx_data *rx) +{ + struct ieee80211_sub_if_data *sdata = rx->sdata; + struct ieee80211_hdr *hdr = (void *)rx->skb->data; + struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb); + int tid, ac; + + if (!rx->sta || !(status->rx_flags & IEEE80211_RX_RA_MATCH)) + return RX_CONTINUE; + + if (sdata->vif.type != NL80211_IFTYPE_AP && + sdata->vif.type != NL80211_IFTYPE_AP_VLAN) + return RX_CONTINUE; + + /* + * The device handles station powersave, so don't do anything about + * uAPSD and PS-Poll frames (the latter shouldn't even come up from + * it to mac80211 since they're handled.) + */ + if (sdata->local->hw.flags & IEEE80211_HW_AP_LINK_PS) + return RX_CONTINUE; + + /* + * Don't do anything if the station isn't already asleep. In + * the uAPSD case, the station will probably be marked asleep, + * in the PS-Poll case the station must be confused ... + */ + if (!test_sta_flag(rx->sta, WLAN_STA_PS_STA)) + return RX_CONTINUE; + + if (unlikely(ieee80211_is_pspoll(hdr->frame_control))) { + if (!test_sta_flag(rx->sta, WLAN_STA_SP)) { + if (!test_sta_flag(rx->sta, WLAN_STA_PS_DRIVER)) + ieee80211_sta_ps_deliver_poll_response(rx->sta); + else + set_sta_flag(rx->sta, WLAN_STA_PSPOLL); + } + + /* Free PS Poll skb here instead of returning RX_DROP that would + * count as an dropped frame. */ + dev_kfree_skb(rx->skb); + + return RX_QUEUED; + } else if (!ieee80211_has_morefrags(hdr->frame_control) && + !(status->rx_flags & IEEE80211_RX_DEFERRED_RELEASE) && + ieee80211_has_pm(hdr->frame_control) && + (ieee80211_is_data_qos(hdr->frame_control) || + ieee80211_is_qos_nullfunc(hdr->frame_control))) { + tid = *ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_TID_MASK; + ac = ieee802_1d_to_ac[tid & 7]; + + /* + * If this AC is not trigger-enabled do nothing. + * + * NB: This could/should check a separate bitmap of trigger- + * enabled queues, but for now we only implement uAPSD w/o + * TSPEC changes to the ACs, so they're always the same. + */ + if (!(rx->sta->sta.uapsd_queues & BIT(ac))) + return RX_CONTINUE; + + /* if we are in a service period, do nothing */ + if (test_sta_flag(rx->sta, WLAN_STA_SP)) + return RX_CONTINUE; + + if (!test_sta_flag(rx->sta, WLAN_STA_PS_DRIVER)) + ieee80211_sta_ps_deliver_uapsd(rx->sta); + else + set_sta_flag(rx->sta, WLAN_STA_UAPSD); + } + + return RX_CONTINUE; +} + +static ieee80211_rx_result debug_noinline ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) { struct sta_info *sta = rx->sta; @@ -1216,7 +1319,7 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) !(status->rx_flags & IEEE80211_RX_DEFERRED_RELEASE) && (rx->sdata->vif.type == NL80211_IFTYPE_AP || rx->sdata->vif.type == NL80211_IFTYPE_AP_VLAN)) { - if (test_sta_flags(sta, WLAN_STA_PS_STA)) { + if (test_sta_flag(sta, WLAN_STA_PS_STA)) { /* * Ignore doze->wake transitions that are * indicated by non-data frames, the standard @@ -1244,15 +1347,20 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) /* * If we receive a 4-addr nullfunc frame from a STA - * that was not moved to a 4-addr STA vlan yet, drop - * the frame to the monitor interface, to make sure - * that hostapd sees it + * that was not moved to a 4-addr STA vlan yet send + * the event to userspace and for older hostapd drop + * the frame to the monitor interface. */ if (ieee80211_has_a4(hdr->frame_control) && (rx->sdata->vif.type == NL80211_IFTYPE_AP || (rx->sdata->vif.type == NL80211_IFTYPE_AP_VLAN && - !rx->sdata->u.vlan.sta))) + !rx->sdata->u.vlan.sta))) { + if (!test_and_set_sta_flag(sta, WLAN_STA_4ADDR_EVENT)) + cfg80211_rx_unexpected_4addr_frame( + rx->sdata->dev, sta->sta.addr, + GFP_ATOMIC); return RX_DROP_MONITOR; + } /* * Update counter and free packet here to avoid * counting this as a dropped packed. @@ -1468,57 +1576,11 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) return RX_CONTINUE; } -static ieee80211_rx_result debug_noinline -ieee80211_rx_h_ps_poll(struct ieee80211_rx_data *rx) -{ - struct ieee80211_sub_if_data *sdata = rx->sdata; - __le16 fc = ((struct ieee80211_hdr *)rx->skb->data)->frame_control; - struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb); - - if (likely(!rx->sta || !ieee80211_is_pspoll(fc) || - !(status->rx_flags & IEEE80211_RX_RA_MATCH))) - return RX_CONTINUE; - - if ((sdata->vif.type != NL80211_IFTYPE_AP) && - (sdata->vif.type != NL80211_IFTYPE_AP_VLAN)) - return RX_DROP_UNUSABLE; - - if (!test_sta_flags(rx->sta, WLAN_STA_PS_DRIVER)) - ieee80211_sta_ps_deliver_poll_response(rx->sta); - else - set_sta_flags(rx->sta, WLAN_STA_PSPOLL); - - /* Free PS Poll skb here instead of returning RX_DROP that would - * count as an dropped frame. */ - dev_kfree_skb(rx->skb); - - return RX_QUEUED; -} - -static ieee80211_rx_result debug_noinline -ieee80211_rx_h_remove_qos_control(struct ieee80211_rx_data *rx) -{ - u8 *data = rx->skb->data; - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)data; - - if (!ieee80211_is_data_qos(hdr->frame_control)) - return RX_CONTINUE; - - /* remove the qos control field, update frame type and meta-data */ - memmove(data + IEEE80211_QOS_CTL_LEN, data, - ieee80211_hdrlen(hdr->frame_control) - IEEE80211_QOS_CTL_LEN); - hdr = (struct ieee80211_hdr *)skb_pull(rx->skb, IEEE80211_QOS_CTL_LEN); - /* change frame type to non QOS */ - hdr->frame_control &= ~cpu_to_le16(IEEE80211_STYPE_QOS_DATA); - - return RX_CONTINUE; -} - static int ieee80211_802_1x_port_control(struct ieee80211_rx_data *rx) { if (unlikely(!rx->sta || - !test_sta_flags(rx->sta, WLAN_STA_AUTHORIZED))) + !test_sta_flag(rx->sta, WLAN_STA_AUTHORIZED))) return -EACCES; return 0; @@ -1561,7 +1623,7 @@ ieee80211_drop_unencrypted_mgmt(struct ieee80211_rx_data *rx) if (status->flag & RX_FLAG_DECRYPTED) return 0; - if (rx->sta && test_sta_flags(rx->sta, WLAN_STA_MFP)) { + if (rx->sta && test_sta_flag(rx->sta, WLAN_STA_MFP)) { if (unlikely(!ieee80211_has_protected(fc) && ieee80211_is_unicast_robust_mgmt_frame(rx->skb) && rx->key)) { @@ -1746,7 +1808,12 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx) } if (xmit_skb) { - /* send to wireless media */ + /* + * Send to wireless media and increase priority by 256 to + * keep the received priority instead of reclassifying + * the frame (see cfg80211_classify8021d). + */ + xmit_skb->priority += 256; xmit_skb->protocol = htons(ETH_P_802_3); skb_reset_network_header(xmit_skb); skb_reset_mac_header(xmit_skb); @@ -1815,23 +1882,31 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx) static ieee80211_rx_result ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx) { - struct ieee80211_hdr *hdr; + struct ieee80211_hdr *fwd_hdr, *hdr; + struct ieee80211_tx_info *info; struct ieee80211s_hdr *mesh_hdr; - unsigned int hdrlen; struct sk_buff *skb = rx->skb, *fwd_skb; struct ieee80211_local *local = rx->local; struct ieee80211_sub_if_data *sdata = rx->sdata; struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); + struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; + __le16 reason = cpu_to_le16(WLAN_REASON_MESH_PATH_NOFORWARD); + u16 q, hdrlen; hdr = (struct ieee80211_hdr *) skb->data; hdrlen = ieee80211_hdrlen(hdr->frame_control); mesh_hdr = (struct ieee80211s_hdr *) (skb->data + hdrlen); + /* frame is in RMC, don't forward */ + if (ieee80211_is_data(hdr->frame_control) && + is_multicast_ether_addr(hdr->addr1) && + mesh_rmc_check(hdr->addr3, mesh_hdr, rx->sdata)) + return RX_DROP_MONITOR; + if (!ieee80211_is_data(hdr->frame_control)) return RX_CONTINUE; if (!mesh_hdr->ttl) - /* illegal frame */ return RX_DROP_MONITOR; if (mesh_hdr->flags & MESH_FLAGS_AE) { @@ -1865,60 +1940,50 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx) compare_ether_addr(sdata->vif.addr, hdr->addr3) == 0) return RX_CONTINUE; - mesh_hdr->ttl--; + q = ieee80211_select_queue_80211(local, skb, hdr); + if (ieee80211_queue_stopped(&local->hw, q)) { + IEEE80211_IFSTA_MESH_CTR_INC(ifmsh, dropped_frames_congestion); + return RX_DROP_MONITOR; + } + skb_set_queue_mapping(skb, q); - if (status->rx_flags & IEEE80211_RX_RA_MATCH) { - if (!mesh_hdr->ttl) - IEEE80211_IFSTA_MESH_CTR_INC(&rx->sdata->u.mesh, - dropped_frames_ttl); - else { - struct ieee80211_hdr *fwd_hdr; - struct ieee80211_tx_info *info; - - fwd_skb = skb_copy(skb, GFP_ATOMIC); - - if (!fwd_skb && net_ratelimit()) - printk(KERN_DEBUG "%s: failed to clone mesh frame\n", - sdata->name); - if (!fwd_skb) - goto out; - - fwd_hdr = (struct ieee80211_hdr *) fwd_skb->data; - memcpy(fwd_hdr->addr2, sdata->vif.addr, ETH_ALEN); - info = IEEE80211_SKB_CB(fwd_skb); - memset(info, 0, sizeof(*info)); - info->flags |= IEEE80211_TX_INTFL_NEED_TXPROCESSING; - info->control.vif = &rx->sdata->vif; - skb_set_queue_mapping(skb, - ieee80211_select_queue(rx->sdata, fwd_skb)); - ieee80211_set_qos_hdr(local, skb); - if (is_multicast_ether_addr(fwd_hdr->addr1)) - IEEE80211_IFSTA_MESH_CTR_INC(&sdata->u.mesh, - fwded_mcast); - else { - int err; - /* - * Save TA to addr1 to send TA a path error if a - * suitable next hop is not found - */ - memcpy(fwd_hdr->addr1, fwd_hdr->addr2, - ETH_ALEN); - err = mesh_nexthop_lookup(fwd_skb, sdata); - /* Failed to immediately resolve next hop: - * fwded frame was dropped or will be added - * later to the pending skb queue. */ - if (err) - return RX_DROP_MONITOR; - - IEEE80211_IFSTA_MESH_CTR_INC(&sdata->u.mesh, - fwded_unicast); - } - IEEE80211_IFSTA_MESH_CTR_INC(&sdata->u.mesh, - fwded_frames); - ieee80211_add_pending_skb(local, fwd_skb); - } + if (!(status->rx_flags & IEEE80211_RX_RA_MATCH)) + goto out; + + if (!--mesh_hdr->ttl) { + IEEE80211_IFSTA_MESH_CTR_INC(ifmsh, dropped_frames_ttl); + return RX_DROP_MONITOR; + } + + fwd_skb = skb_copy(skb, GFP_ATOMIC); + if (!fwd_skb) { + if (net_ratelimit()) + printk(KERN_DEBUG "%s: failed to clone mesh frame\n", + sdata->name); + goto out; } + fwd_hdr = (struct ieee80211_hdr *) fwd_skb->data; + info = IEEE80211_SKB_CB(fwd_skb); + memset(info, 0, sizeof(*info)); + info->flags |= IEEE80211_TX_INTFL_NEED_TXPROCESSING; + info->control.vif = &rx->sdata->vif; + info->control.jiffies = jiffies; + if (is_multicast_ether_addr(fwd_hdr->addr1)) { + IEEE80211_IFSTA_MESH_CTR_INC(ifmsh, fwded_mcast); + memcpy(fwd_hdr->addr2, sdata->vif.addr, ETH_ALEN); + } else if (!mesh_nexthop_lookup(fwd_skb, sdata)) { + IEEE80211_IFSTA_MESH_CTR_INC(ifmsh, fwded_unicast); + } else { + /* unable to resolve next hop */ + mesh_path_error_tx(ifmsh->mshcfg.element_ttl, fwd_hdr->addr3, + 0, reason, fwd_hdr->addr2, sdata); + IEEE80211_IFSTA_MESH_CTR_INC(ifmsh, dropped_frames_no_route); + return RX_DROP_MONITOR; + } + + IEEE80211_IFSTA_MESH_CTR_INC(ifmsh, fwded_frames); + ieee80211_add_pending_skb(local, fwd_skb); out: if (is_multicast_ether_addr(hdr->addr1) || sdata->dev->flags & IFF_PROMISC) @@ -1946,12 +2011,17 @@ ieee80211_rx_h_data(struct ieee80211_rx_data *rx) return RX_DROP_MONITOR; /* - * Allow the cooked monitor interface of an AP to see 4-addr frames so - * that a 4-addr station can be detected and moved into a separate VLAN + * Send unexpected-4addr-frame event to hostapd. For older versions, + * also drop the frame to cooked monitor interfaces. */ if (ieee80211_has_a4(hdr->frame_control) && - sdata->vif.type == NL80211_IFTYPE_AP) + sdata->vif.type == NL80211_IFTYPE_AP) { + if (rx->sta && + !test_and_set_sta_flag(rx->sta, WLAN_STA_4ADDR_EVENT)) + cfg80211_rx_unexpected_4addr_frame( + rx->sdata->dev, rx->sta->sta.addr, GFP_ATOMIC); return RX_DROP_MONITOR; + } err = __ieee80211_data_to_8023(rx, &port_control); if (unlikely(err)) @@ -2106,6 +2176,18 @@ ieee80211_rx_h_mgmt_check(struct ieee80211_rx_data *rx) if (!ieee80211_is_mgmt(mgmt->frame_control)) return RX_DROP_MONITOR; + if (rx->sdata->vif.type == NL80211_IFTYPE_AP && + ieee80211_is_beacon(mgmt->frame_control) && + !(rx->flags & IEEE80211_RX_BEACON_REPORTED)) { + struct ieee80211_rx_status *status; + + status = IEEE80211_SKB_RXCB(rx->skb); + cfg80211_report_obss_beacon(rx->local->hw.wiphy, + rx->skb->data, rx->skb->len, + status->freq, GFP_ATOMIC); + rx->flags |= IEEE80211_RX_BEACON_REPORTED; + } + if (!(status->rx_flags & IEEE80211_RX_RA_MATCH)) return RX_DROP_MONITOR; @@ -2138,16 +2220,69 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) return RX_DROP_UNUSABLE; switch (mgmt->u.action.category) { + case WLAN_CATEGORY_HT: + /* reject HT action frames from stations not supporting HT */ + if (!rx->sta->sta.ht_cap.ht_supported) + goto invalid; + + if (sdata->vif.type != NL80211_IFTYPE_STATION && + sdata->vif.type != NL80211_IFTYPE_MESH_POINT && + sdata->vif.type != NL80211_IFTYPE_AP_VLAN && + sdata->vif.type != NL80211_IFTYPE_AP && + sdata->vif.type != NL80211_IFTYPE_ADHOC) + break; + + /* verify action & smps_control are present */ + if (len < IEEE80211_MIN_ACTION_SIZE + 2) + goto invalid; + + switch (mgmt->u.action.u.ht_smps.action) { + case WLAN_HT_ACTION_SMPS: { + struct ieee80211_supported_band *sband; + u8 smps; + + /* convert to HT capability */ + switch (mgmt->u.action.u.ht_smps.smps_control) { + case WLAN_HT_SMPS_CONTROL_DISABLED: + smps = WLAN_HT_CAP_SM_PS_DISABLED; + break; + case WLAN_HT_SMPS_CONTROL_STATIC: + smps = WLAN_HT_CAP_SM_PS_STATIC; + break; + case WLAN_HT_SMPS_CONTROL_DYNAMIC: + smps = WLAN_HT_CAP_SM_PS_DYNAMIC; + break; + default: + goto invalid; + } + smps <<= IEEE80211_HT_CAP_SM_PS_SHIFT; + + /* if no change do nothing */ + if ((rx->sta->sta.ht_cap.cap & + IEEE80211_HT_CAP_SM_PS) == smps) + goto handled; + + rx->sta->sta.ht_cap.cap &= ~IEEE80211_HT_CAP_SM_PS; + rx->sta->sta.ht_cap.cap |= smps; + + sband = rx->local->hw.wiphy->bands[status->band]; + + rate_control_rate_update(local, sband, rx->sta, + IEEE80211_RC_SMPS_CHANGED, + local->_oper_channel_type); + goto handled; + } + default: + goto invalid; + } + + break; case WLAN_CATEGORY_BACK: - /* - * The aggregation code is not prepared to handle - * anything but STA/AP due to the BSSID handling; - * IBSS could work in the code but isn't supported - * by drivers or the standard. - */ if (sdata->vif.type != NL80211_IFTYPE_STATION && + sdata->vif.type != NL80211_IFTYPE_MESH_POINT && sdata->vif.type != NL80211_IFTYPE_AP_VLAN && - sdata->vif.type != NL80211_IFTYPE_AP) + sdata->vif.type != NL80211_IFTYPE_AP && + sdata->vif.type != NL80211_IFTYPE_ADHOC) break; /* verify action_code is present */ @@ -2220,12 +2355,29 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) goto handled; } break; + case WLAN_CATEGORY_SELF_PROTECTED: + switch (mgmt->u.action.u.self_prot.action_code) { + case WLAN_SP_MESH_PEERING_OPEN: + case WLAN_SP_MESH_PEERING_CLOSE: + case WLAN_SP_MESH_PEERING_CONFIRM: + if (!ieee80211_vif_is_mesh(&sdata->vif)) + goto invalid; + if (sdata->u.mesh.security != IEEE80211_MESH_SEC_NONE) + /* userspace handles this frame */ + break; + goto queue; + case WLAN_SP_MGK_INFORM: + case WLAN_SP_MGK_ACK: + if (!ieee80211_vif_is_mesh(&sdata->vif)) + goto invalid; + break; + } + break; case WLAN_CATEGORY_MESH_ACTION: if (!ieee80211_vif_is_mesh(&sdata->vif)) break; - goto queue; - case WLAN_CATEGORY_MESH_PATH_SEL: - if (!mesh_path_sel_is_hwmp(sdata)) + if (mesh_action_is_path_sel(mgmt) && + (!mesh_path_sel_is_hwmp(sdata))) break; goto queue; } @@ -2408,6 +2560,10 @@ static void ieee80211_rx_cooked_monitor(struct ieee80211_rx_data *rx, goto out_free_skb; rx->flags |= IEEE80211_RX_CMNTR; + /* If there are no cooked monitor interfaces, just free the SKB */ + if (!local->cooked_mntrs) + goto out_free_skb; + if (skb_headroom(skb) < sizeof(*rthdr) && pskb_expand_head(skb, sizeof(*rthdr), 0, GFP_ATOMIC)) goto out_free_skb; @@ -2534,17 +2690,16 @@ static void ieee80211_rx_handlers(struct ieee80211_rx_data *rx) CALL_RXH(ieee80211_rx_h_decrypt) CALL_RXH(ieee80211_rx_h_check_more_data) + CALL_RXH(ieee80211_rx_h_uapsd_and_pspoll) CALL_RXH(ieee80211_rx_h_sta_process) CALL_RXH(ieee80211_rx_h_defragment) - CALL_RXH(ieee80211_rx_h_ps_poll) CALL_RXH(ieee80211_rx_h_michael_mic_verify) /* must be after MMIC verify so header is counted in MPDU mic */ - CALL_RXH(ieee80211_rx_h_remove_qos_control) - CALL_RXH(ieee80211_rx_h_amsdu) #ifdef CONFIG_MAC80211_MESH if (ieee80211_vif_is_mesh(&rx->sdata->vif)) CALL_RXH(ieee80211_rx_h_mesh_fwding); #endif + CALL_RXH(ieee80211_rx_h_amsdu) CALL_RXH(ieee80211_rx_h_data) CALL_RXH(ieee80211_rx_h_ctrl); CALL_RXH(ieee80211_rx_h_mgmt_check) @@ -2663,8 +2818,8 @@ static int prepare_for_handlers(struct ieee80211_rx_data *rx, rate_idx = 0; /* TODO: HT rates */ else rate_idx = status->rate_idx; - rx->sta = ieee80211_ibss_add_sta(sdata, bssid, - hdr->addr2, BIT(rate_idx), GFP_ATOMIC); + ieee80211_ibss_rx_no_sta(sdata, bssid, hdr->addr2, + BIT(rate_idx)); } break; case NL80211_IFTYPE_MESH_POINT: @@ -2685,6 +2840,15 @@ static int prepare_for_handlers(struct ieee80211_rx_data *rx, return 0; } else if (!ieee80211_bssid_match(bssid, sdata->vif.addr)) { + /* + * Accept public action frames even when the + * BSSID doesn't match, this is used for P2P + * and location updates. Note that mac80211 + * itself never looks at these frames. + */ + if (!(status->rx_flags & IEEE80211_RX_IN_SCAN) && + ieee80211_is_public_action(hdr, skb->len)) + return 1; if (!(status->rx_flags & IEEE80211_RX_IN_SCAN) && !ieee80211_is_beacon(hdr->frame_control)) return 0; @@ -2791,7 +2955,7 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, if (ieee80211_is_data(fc)) { prev_sta = NULL; - for_each_sta_info(local, hdr->addr2, sta, tmp) { + for_each_sta_info_rx(local, hdr->addr2, sta, tmp) { if (!prev_sta) { prev_sta = sta; continue; @@ -2835,7 +2999,7 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, continue; } - rx.sta = sta_info_get_bss(prev, hdr->addr2); + rx.sta = sta_info_get_bss_rx(prev, hdr->addr2); rx.sdata = prev; ieee80211_prepare_and_rx_handle(&rx, skb, false); @@ -2843,7 +3007,7 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, } if (prev) { - rx.sta = sta_info_get_bss(prev, hdr->addr2); + rx.sta = sta_info_get_bss_rx(prev, hdr->addr2); rx.sdata = prev; if (ieee80211_prepare_and_rx_handle(&rx, skb, true)) diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 6f09eca01112..9270771702fe 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -14,9 +14,10 @@ #include <linux/if_arp.h> #include <linux/rtnetlink.h> -#include <linux/pm_qos_params.h> +#include <linux/pm_qos.h> #include <net/sch_generic.h> #include <linux/slab.h> +#include <linux/export.h> #include <net/mac80211.h> #include "ieee80211_i.h" @@ -105,7 +106,7 @@ ieee80211_bss_info_update(struct ieee80211_local *local, /* save the ERP value so that it is available at association time */ if (elems->erp_info && elems->erp_info_len >= 1) { bss->erp_value = elems->erp_info[0]; - bss->has_erp_value = 1; + bss->has_erp_value = true; } if (elems->tim) { @@ -212,12 +213,7 @@ ieee80211_scan_rx(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) if (bss) ieee80211_rx_bss_put(sdata->local, bss); - /* If we are on-operating-channel, and this packet is for the - * current channel, pass the pkt on up the stack so that - * the rest of the stack can make use of it. - */ - if (ieee80211_cfg_on_oper_channel(sdata->local) - && (channel == sdata->local->oper_channel)) + if (channel == sdata->local->oper_channel) return RX_CONTINUE; dev_kfree_skb(skb); @@ -254,6 +250,7 @@ static bool ieee80211_prep_hw_scan(struct ieee80211_local *local) req->ie, req->ie_len, band, req->rates[band], 0); local->hw_scan_req->ie_len = ielen; + local->hw_scan_req->no_cck = req->no_cck; return true; } @@ -262,8 +259,6 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted, bool was_hw_scan) { struct ieee80211_local *local = hw_to_local(hw); - bool on_oper_chan; - bool enable_beacons = false; lockdep_assert_held(&local->mtx); @@ -296,25 +291,13 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted, local->scanning = 0; local->scan_channel = NULL; - on_oper_chan = ieee80211_cfg_on_oper_channel(local); - - if (was_hw_scan || !on_oper_chan) - ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL); - else - /* Set power back to normal operating levels. */ - ieee80211_hw_config(local, 0); + /* Set power back to normal operating levels. */ + ieee80211_hw_config(local, 0); if (!was_hw_scan) { - bool on_oper_chan2; ieee80211_configure_filter(local); drv_sw_scan_complete(local); - on_oper_chan2 = ieee80211_cfg_on_oper_channel(local); - /* We should always be on-channel at this point. */ - WARN_ON(!on_oper_chan2); - if (on_oper_chan2 && (on_oper_chan != on_oper_chan2)) - enable_beacons = true; - - ieee80211_offchannel_return(local, enable_beacons, true); + ieee80211_offchannel_return(local, true); } ieee80211_recalc_idle(local); @@ -359,11 +342,7 @@ static int ieee80211_start_sw_scan(struct ieee80211_local *local) local->next_scan_state = SCAN_DECISION; local->scan_channel_idx = 0; - /* We always want to use off-channel PS, even if we - * are not really leaving oper-channel. Don't - * tell the AP though, as long as we are on-channel. - */ - ieee80211_offchannel_enable_all_ps(local, false); + ieee80211_offchannel_stop_vifs(local, true); ieee80211_configure_filter(local); @@ -371,8 +350,7 @@ static int ieee80211_start_sw_scan(struct ieee80211_local *local) ieee80211_hw_config(local, 0); ieee80211_queue_delayed_work(&local->hw, - &local->scan_work, - IEEE80211_CHANNEL_TIME); + &local->scan_work, 0); return 0; } @@ -508,96 +486,39 @@ static void ieee80211_scan_state_decision(struct ieee80211_local *local, next_chan = local->scan_req->channels[local->scan_channel_idx]; - if (ieee80211_cfg_on_oper_channel(local)) { - /* We're currently on operating channel. */ - if (next_chan == local->oper_channel) - /* We don't need to move off of operating channel. */ - local->next_scan_state = SCAN_SET_CHANNEL; - else - /* - * We do need to leave operating channel, as next - * scan is somewhere else. - */ - local->next_scan_state = SCAN_LEAVE_OPER_CHANNEL; - } else { - /* - * we're currently scanning a different channel, let's - * see if we can scan another channel without interfering - * with the current traffic situation. - * - * Since we don't know if the AP has pending frames for us - * we can only check for our tx queues and use the current - * pm_qos requirements for rx. Hence, if no tx traffic occurs - * at all we will scan as many channels in a row as the pm_qos - * latency allows us to. Additionally we also check for the - * currently negotiated listen interval to prevent losing - * frames unnecessarily. - * - * Otherwise switch back to the operating channel. - */ - - bad_latency = time_after(jiffies + - ieee80211_scan_get_channel_time(next_chan), - local->leave_oper_channel_time + - usecs_to_jiffies(pm_qos_request(PM_QOS_NETWORK_LATENCY))); - - listen_int_exceeded = time_after(jiffies + - ieee80211_scan_get_channel_time(next_chan), - local->leave_oper_channel_time + - usecs_to_jiffies(min_beacon_int * 1024) * - local->hw.conf.listen_interval); - - if (associated && ( !tx_empty || bad_latency || - listen_int_exceeded)) - local->next_scan_state = SCAN_ENTER_OPER_CHANNEL; - else - local->next_scan_state = SCAN_SET_CHANNEL; - } - - *next_delay = 0; -} - -static void ieee80211_scan_state_leave_oper_channel(struct ieee80211_local *local, - unsigned long *next_delay) -{ - /* PS will already be in off-channel mode, - * we do that once at the beginning of scanning. - */ - ieee80211_offchannel_stop_vifs(local, false); - /* - * What if the nullfunc frames didn't arrive? + * we're currently scanning a different channel, let's + * see if we can scan another channel without interfering + * with the current traffic situation. + * + * Since we don't know if the AP has pending frames for us + * we can only check for our tx queues and use the current + * pm_qos requirements for rx. Hence, if no tx traffic occurs + * at all we will scan as many channels in a row as the pm_qos + * latency allows us to. Additionally we also check for the + * currently negotiated listen interval to prevent losing + * frames unnecessarily. + * + * Otherwise switch back to the operating channel. */ - drv_flush(local, false); - if (local->ops->flush) - *next_delay = 0; - else - *next_delay = HZ / 10; - /* remember when we left the operating channel */ - local->leave_oper_channel_time = jiffies; + bad_latency = time_after(jiffies + + ieee80211_scan_get_channel_time(next_chan), + local->leave_oper_channel_time + + usecs_to_jiffies(pm_qos_request(PM_QOS_NETWORK_LATENCY))); - /* advance to the next channel to be scanned */ - local->next_scan_state = SCAN_SET_CHANNEL; -} - -static void ieee80211_scan_state_enter_oper_channel(struct ieee80211_local *local, - unsigned long *next_delay) -{ - /* switch back to the operating channel */ - local->scan_channel = NULL; - if (!ieee80211_cfg_on_oper_channel(local)) - ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL); + listen_int_exceeded = time_after(jiffies + + ieee80211_scan_get_channel_time(next_chan), + local->leave_oper_channel_time + + usecs_to_jiffies(min_beacon_int * 1024) * + local->hw.conf.listen_interval); - /* - * Re-enable vifs and beaconing. Leave PS - * in off-channel state..will put that back - * on-channel at the end of scanning. - */ - ieee80211_offchannel_return(local, true, false); + if (associated && (!tx_empty || bad_latency || listen_int_exceeded)) + local->next_scan_state = SCAN_SUSPEND; + else + local->next_scan_state = SCAN_SET_CHANNEL; - *next_delay = HZ / 5; - local->next_scan_state = SCAN_DECISION; + *next_delay = 0; } static void ieee80211_scan_state_set_channel(struct ieee80211_local *local, @@ -611,10 +532,8 @@ static void ieee80211_scan_state_set_channel(struct ieee80211_local *local, local->scan_channel = chan; - /* Only call hw-config if we really need to change channels. */ - if (chan != local->hw.conf.channel) - if (ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL)) - skip = 1; + if (ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL)) + skip = 1; /* advance state machine to next channel/band */ local->scan_channel_idx++; @@ -660,7 +579,8 @@ static void ieee80211_scan_state_send_probe(struct ieee80211_local *local, local->scan_req->ssids[i].ssid, local->scan_req->ssids[i].ssid_len, local->scan_req->ie, local->scan_req->ie_len, - local->scan_req->rates[band], false); + local->scan_req->rates[band], false, + local->scan_req->no_cck); /* * After sending probe requests, wait for probe responses @@ -670,6 +590,44 @@ static void ieee80211_scan_state_send_probe(struct ieee80211_local *local, local->next_scan_state = SCAN_DECISION; } +static void ieee80211_scan_state_suspend(struct ieee80211_local *local, + unsigned long *next_delay) +{ + /* switch back to the operating channel */ + local->scan_channel = NULL; + ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL); + + /* + * Re-enable vifs and beaconing. Leave PS + * in off-channel state..will put that back + * on-channel at the end of scanning. + */ + ieee80211_offchannel_return(local, false); + + *next_delay = HZ / 5; + /* afterwards, resume scan & go to next channel */ + local->next_scan_state = SCAN_RESUME; +} + +static void ieee80211_scan_state_resume(struct ieee80211_local *local, + unsigned long *next_delay) +{ + /* PS already is in off-channel mode */ + ieee80211_offchannel_stop_vifs(local, false); + + if (local->ops->flush) { + drv_flush(local, false); + *next_delay = 0; + } else + *next_delay = HZ / 10; + + /* remember when we left the operating channel */ + local->leave_oper_channel_time = jiffies; + + /* advance to the next channel to be scanned */ + local->next_scan_state = SCAN_SET_CHANNEL; +} + void ieee80211_scan_work(struct work_struct *work) { struct ieee80211_local *local = @@ -740,11 +698,11 @@ void ieee80211_scan_work(struct work_struct *work) case SCAN_SEND_PROBE: ieee80211_scan_state_send_probe(local, &next_delay); break; - case SCAN_LEAVE_OPER_CHANNEL: - ieee80211_scan_state_leave_oper_channel(local, &next_delay); + case SCAN_SUSPEND: + ieee80211_scan_state_suspend(local, &next_delay); break; - case SCAN_ENTER_OPER_CHANNEL: - ieee80211_scan_state_enter_oper_channel(local, &next_delay); + case SCAN_RESUME: + ieee80211_scan_state_resume(local, &next_delay); break; } } while (next_delay == 0); diff --git a/net/mac80211/spectmgmt.c b/net/mac80211/spectmgmt.c index 7733f66ee2c4..578eea3fc04d 100644 --- a/net/mac80211/spectmgmt.c +++ b/net/mac80211/spectmgmt.c @@ -32,12 +32,8 @@ static void ieee80211_send_refuse_measurement_request(struct ieee80211_sub_if_da skb = dev_alloc_skb(sizeof(*msr_report) + local->hw.extra_tx_headroom + sizeof(struct ieee80211_msrment_ie)); - - if (!skb) { - printk(KERN_ERR "%s: failed to allocate buffer for " - "measurement report frame\n", sdata->name); + if (!skb) return; - } skb_reserve(skb, local->hw.extra_tx_headroom); msr_report = (struct ieee80211_mgmt *)skb_put(skb, 24); diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 21070e9bc8d0..3c428d4839c7 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -24,6 +24,7 @@ #include "sta_info.h" #include "debugfs_sta.h" #include "mesh.h" +#include "wme.h" /** * DOC: STA information lifetime rules @@ -61,14 +62,14 @@ * freed before they are done using it. */ -/* Caller must hold local->sta_lock */ +/* Caller must hold local->sta_mtx */ static int sta_info_hash_del(struct ieee80211_local *local, struct sta_info *sta) { struct sta_info *s; s = rcu_dereference_protected(local->sta_hash[STA_HASH(sta->sta.addr)], - lockdep_is_held(&local->sta_lock)); + lockdep_is_held(&local->sta_mtx)); if (!s) return -ENOENT; if (s == sta) { @@ -80,7 +81,7 @@ static int sta_info_hash_del(struct ieee80211_local *local, while (rcu_access_pointer(s->hnext) && rcu_access_pointer(s->hnext) != sta) s = rcu_dereference_protected(s->hnext, - lockdep_is_held(&local->sta_lock)); + lockdep_is_held(&local->sta_mtx)); if (rcu_access_pointer(s->hnext)) { rcu_assign_pointer(s->hnext, sta->hnext); return 0; @@ -97,14 +98,31 @@ struct sta_info *sta_info_get(struct ieee80211_sub_if_data *sdata, struct sta_info *sta; sta = rcu_dereference_check(local->sta_hash[STA_HASH(addr)], - lockdep_is_held(&local->sta_lock) || + lockdep_is_held(&local->sta_mtx)); + while (sta) { + if (sta->sdata == sdata && !sta->dummy && + memcmp(sta->sta.addr, addr, ETH_ALEN) == 0) + break; + sta = rcu_dereference_check(sta->hnext, + lockdep_is_held(&local->sta_mtx)); + } + return sta; +} + +/* get a station info entry even if it is a dummy station*/ +struct sta_info *sta_info_get_rx(struct ieee80211_sub_if_data *sdata, + const u8 *addr) +{ + struct ieee80211_local *local = sdata->local; + struct sta_info *sta; + + sta = rcu_dereference_check(local->sta_hash[STA_HASH(addr)], lockdep_is_held(&local->sta_mtx)); while (sta) { if (sta->sdata == sdata && memcmp(sta->sta.addr, addr, ETH_ALEN) == 0) break; sta = rcu_dereference_check(sta->hnext, - lockdep_is_held(&local->sta_lock) || lockdep_is_held(&local->sta_mtx)); } return sta; @@ -121,7 +139,30 @@ struct sta_info *sta_info_get_bss(struct ieee80211_sub_if_data *sdata, struct sta_info *sta; sta = rcu_dereference_check(local->sta_hash[STA_HASH(addr)], - lockdep_is_held(&local->sta_lock) || + lockdep_is_held(&local->sta_mtx)); + while (sta) { + if ((sta->sdata == sdata || + (sta->sdata->bss && sta->sdata->bss == sdata->bss)) && + !sta->dummy && + memcmp(sta->sta.addr, addr, ETH_ALEN) == 0) + break; + sta = rcu_dereference_check(sta->hnext, + lockdep_is_held(&local->sta_mtx)); + } + return sta; +} + +/* + * Get sta info either from the specified interface + * or from one of its vlans (including dummy stations) + */ +struct sta_info *sta_info_get_bss_rx(struct ieee80211_sub_if_data *sdata, + const u8 *addr) +{ + struct ieee80211_local *local = sdata->local; + struct sta_info *sta; + + sta = rcu_dereference_check(local->sta_hash[STA_HASH(addr)], lockdep_is_held(&local->sta_mtx)); while (sta) { if ((sta->sdata == sdata || @@ -129,7 +170,6 @@ struct sta_info *sta_info_get_bss(struct ieee80211_sub_if_data *sdata, memcmp(sta->sta.addr, addr, ETH_ALEN) == 0) break; sta = rcu_dereference_check(sta->hnext, - lockdep_is_held(&local->sta_lock) || lockdep_is_held(&local->sta_mtx)); } return sta; @@ -156,16 +196,17 @@ struct sta_info *sta_info_get_by_idx(struct ieee80211_sub_if_data *sdata, } /** - * __sta_info_free - internal STA free helper + * sta_info_free - free STA * * @local: pointer to the global information * @sta: STA info to free * * This function must undo everything done by sta_info_alloc() - * that may happen before sta_info_insert(). + * that may happen before sta_info_insert(). It may only be + * called when sta_info_insert() has not been attempted (and + * if that fails, the station is freed anyway.) */ -static void __sta_info_free(struct ieee80211_local *local, - struct sta_info *sta) +void sta_info_free(struct ieee80211_local *local, struct sta_info *sta) { if (sta->rate_ctrl) { rate_control_free_sta(sta); @@ -179,10 +220,11 @@ static void __sta_info_free(struct ieee80211_local *local, kfree(sta); } -/* Caller must hold local->sta_lock */ +/* Caller must hold local->sta_mtx */ static void sta_info_hash_add(struct ieee80211_local *local, struct sta_info *sta) { + lockdep_assert_held(&local->sta_mtx); sta->hnext = local->sta_hash[STA_HASH(sta->sta.addr)]; rcu_assign_pointer(local->sta_hash[STA_HASH(sta->sta.addr)], sta); } @@ -196,13 +238,22 @@ static void sta_unblock(struct work_struct *wk) if (sta->dead) return; - if (!test_sta_flags(sta, WLAN_STA_PS_STA)) + if (!test_sta_flag(sta, WLAN_STA_PS_STA)) ieee80211_sta_ps_deliver_wakeup(sta); - else if (test_and_clear_sta_flags(sta, WLAN_STA_PSPOLL)) { - clear_sta_flags(sta, WLAN_STA_PS_DRIVER); + else if (test_and_clear_sta_flag(sta, WLAN_STA_PSPOLL)) { + clear_sta_flag(sta, WLAN_STA_PS_DRIVER); + + local_bh_disable(); ieee80211_sta_ps_deliver_poll_response(sta); + local_bh_enable(); + } else if (test_and_clear_sta_flag(sta, WLAN_STA_UAPSD)) { + clear_sta_flag(sta, WLAN_STA_PS_DRIVER); + + local_bh_disable(); + ieee80211_sta_ps_deliver_uapsd(sta); + local_bh_enable(); } else - clear_sta_flags(sta, WLAN_STA_PS_DRIVER); + clear_sta_flag(sta, WLAN_STA_PS_DRIVER); } static int sta_prepare_rate_control(struct ieee80211_local *local, @@ -223,7 +274,7 @@ static int sta_prepare_rate_control(struct ieee80211_local *local, } struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, - u8 *addr, gfp_t gfp) + const u8 *addr, gfp_t gfp) { struct ieee80211_local *local = sdata->local; struct sta_info *sta; @@ -235,7 +286,6 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, return NULL; spin_lock_init(&sta->lock); - spin_lock_init(&sta->flaglock); INIT_WORK(&sta->drv_unblock_wk, sta_unblock); INIT_WORK(&sta->ampdu_mlme.work, ieee80211_ba_session_work); mutex_init(&sta->ampdu_mlme.mtx); @@ -262,8 +312,10 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, */ sta->timer_to_tid[i] = i; } - skb_queue_head_init(&sta->ps_tx_buf); - skb_queue_head_init(&sta->tx_filtered); + for (i = 0; i < IEEE80211_NUM_ACS; i++) { + skb_queue_head_init(&sta->ps_tx_buf[i]); + skb_queue_head_init(&sta->tx_filtered[i]); + } for (i = 0; i < NUM_RX_DATA_QUEUES; i++) sta->last_seq_ctrl[i] = cpu_to_le16(USHRT_MAX); @@ -280,206 +332,137 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, return sta; } -static int sta_info_finish_insert(struct sta_info *sta, bool async) +static int sta_info_insert_check(struct sta_info *sta) { - struct ieee80211_local *local = sta->local; struct ieee80211_sub_if_data *sdata = sta->sdata; - struct station_info sinfo; - unsigned long flags; - int err = 0; - lockdep_assert_held(&local->sta_mtx); - - /* notify driver */ - if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) - sdata = container_of(sdata->bss, - struct ieee80211_sub_if_data, - u.ap); - err = drv_sta_add(local, sdata, &sta->sta); - if (err) { - if (!async) - return err; - printk(KERN_DEBUG "%s: failed to add IBSS STA %pM to driver (%d)" - " - keeping it anyway.\n", - sdata->name, sta->sta.addr, err); - } else { - sta->uploaded = true; -#ifdef CONFIG_MAC80211_VERBOSE_DEBUG - if (async) - wiphy_debug(local->hw.wiphy, - "Finished adding IBSS STA %pM\n", - sta->sta.addr); -#endif - } - - sdata = sta->sdata; - - if (!async) { - local->num_sta++; - local->sta_generation++; - smp_mb(); - - /* make the station visible */ - spin_lock_irqsave(&local->sta_lock, flags); - sta_info_hash_add(local, sta); - spin_unlock_irqrestore(&local->sta_lock, flags); - } - - list_add(&sta->list, &local->sta_list); - - ieee80211_sta_debugfs_add(sta); - rate_control_add_sta_debugfs(sta); - - sinfo.filled = 0; - sinfo.generation = local->sta_generation; - cfg80211_new_sta(sdata->dev, sta->sta.addr, &sinfo, GFP_KERNEL); + /* + * Can't be a WARN_ON because it can be triggered through a race: + * something inserts a STA (on one CPU) without holding the RTNL + * and another CPU turns off the net device. + */ + if (unlikely(!ieee80211_sdata_running(sdata))) + return -ENETDOWN; + if (WARN_ON(compare_ether_addr(sta->sta.addr, sdata->vif.addr) == 0 || + is_multicast_ether_addr(sta->sta.addr))) + return -EINVAL; return 0; } -static void sta_info_finish_pending(struct ieee80211_local *local) -{ - struct sta_info *sta; - unsigned long flags; - - spin_lock_irqsave(&local->sta_lock, flags); - while (!list_empty(&local->sta_pending_list)) { - sta = list_first_entry(&local->sta_pending_list, - struct sta_info, list); - list_del(&sta->list); - spin_unlock_irqrestore(&local->sta_lock, flags); - - sta_info_finish_insert(sta, true); - - spin_lock_irqsave(&local->sta_lock, flags); - } - spin_unlock_irqrestore(&local->sta_lock, flags); -} - -static void sta_info_finish_work(struct work_struct *work) -{ - struct ieee80211_local *local = - container_of(work, struct ieee80211_local, sta_finish_work); - - mutex_lock(&local->sta_mtx); - sta_info_finish_pending(local); - mutex_unlock(&local->sta_mtx); -} - -int sta_info_insert_rcu(struct sta_info *sta) __acquires(RCU) +/* + * should be called with sta_mtx locked + * this function replaces the mutex lock + * with a RCU lock + */ +static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU) { struct ieee80211_local *local = sta->local; struct ieee80211_sub_if_data *sdata = sta->sdata; - unsigned long flags; + struct sta_info *exist_sta; + bool dummy_reinsert = false; int err = 0; - /* - * Can't be a WARN_ON because it can be triggered through a race: - * something inserts a STA (on one CPU) without holding the RTNL - * and another CPU turns off the net device. - */ - if (unlikely(!ieee80211_sdata_running(sdata))) { - err = -ENETDOWN; - rcu_read_lock(); - goto out_free; - } - - if (WARN_ON(compare_ether_addr(sta->sta.addr, sdata->vif.addr) == 0 || - is_multicast_ether_addr(sta->sta.addr))) { - err = -EINVAL; - rcu_read_lock(); - goto out_free; - } + lockdep_assert_held(&local->sta_mtx); /* - * In ad-hoc mode, we sometimes need to insert stations - * from tasklet context from the RX path. To avoid races, - * always do so in that case -- see the comment below. + * check if STA exists already. + * only accept a scenario of a second call to sta_info_insert_finish + * with a dummy station entry that was inserted earlier + * in that case - assume that the dummy station flag should + * be removed. */ - if (sdata->vif.type == NL80211_IFTYPE_ADHOC) { - spin_lock_irqsave(&local->sta_lock, flags); - /* check if STA exists already */ - if (sta_info_get_bss(sdata, sta->sta.addr)) { - spin_unlock_irqrestore(&local->sta_lock, flags); - rcu_read_lock(); + exist_sta = sta_info_get_bss_rx(sdata, sta->sta.addr); + if (exist_sta) { + if (exist_sta == sta && sta->dummy) { + dummy_reinsert = true; + } else { err = -EEXIST; - goto out_free; + goto out_err; } + } + + if (!sta->dummy || dummy_reinsert) { + /* notify driver */ + err = drv_sta_add(local, sdata, &sta->sta); + if (err) { + if (sdata->vif.type != NL80211_IFTYPE_ADHOC) + goto out_err; + printk(KERN_DEBUG "%s: failed to add IBSS STA %pM to " + "driver (%d) - keeping it anyway.\n", + sdata->name, sta->sta.addr, err); + } else + sta->uploaded = true; + } + if (!dummy_reinsert) { local->num_sta++; local->sta_generation++; smp_mb(); - sta_info_hash_add(local, sta); - list_add_tail(&sta->list, &local->sta_pending_list); + /* make the station visible */ + sta_info_hash_add(local, sta); - rcu_read_lock(); - spin_unlock_irqrestore(&local->sta_lock, flags); + list_add(&sta->list, &local->sta_list); + } else { + sta->dummy = false; + } -#ifdef CONFIG_MAC80211_VERBOSE_DEBUG - wiphy_debug(local->hw.wiphy, "Added IBSS STA %pM\n", - sta->sta.addr); -#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ + if (!sta->dummy) { + struct station_info sinfo; - ieee80211_queue_work(&local->hw, &local->sta_finish_work); + ieee80211_sta_debugfs_add(sta); + rate_control_add_sta_debugfs(sta); - return 0; + memset(&sinfo, 0, sizeof(sinfo)); + sinfo.filled = 0; + sinfo.generation = local->sta_generation; + cfg80211_new_sta(sdata->dev, sta->sta.addr, &sinfo, GFP_KERNEL); } - /* - * On first glance, this will look racy, because the code - * below this point, which inserts a station with sleeping, - * unlocks the sta_lock between checking existence in the - * hash table and inserting into it. - * - * However, it is not racy against itself because it keeps - * the mutex locked. It still seems to race against the - * above code that atomically inserts the station... That, - * however, is not true because the above code can only - * be invoked for IBSS interfaces, and the below code will - * not be -- and the two do not race against each other as - * the hash table also keys off the interface. - */ +#ifdef CONFIG_MAC80211_VERBOSE_DEBUG + wiphy_debug(local->hw.wiphy, "Inserted %sSTA %pM\n", + sta->dummy ? "dummy " : "", sta->sta.addr); +#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ - might_sleep(); + /* move reference to rcu-protected */ + rcu_read_lock(); + mutex_unlock(&local->sta_mtx); - mutex_lock(&local->sta_mtx); + if (ieee80211_vif_is_mesh(&sdata->vif)) + mesh_accept_plinks_update(sdata); - spin_lock_irqsave(&local->sta_lock, flags); - /* check if STA exists already */ - if (sta_info_get_bss(sdata, sta->sta.addr)) { - spin_unlock_irqrestore(&local->sta_lock, flags); - mutex_unlock(&local->sta_mtx); - rcu_read_lock(); - err = -EEXIST; - goto out_free; - } + return 0; + out_err: + mutex_unlock(&local->sta_mtx); + rcu_read_lock(); + return err; +} - spin_unlock_irqrestore(&local->sta_lock, flags); +int sta_info_insert_rcu(struct sta_info *sta) __acquires(RCU) +{ + struct ieee80211_local *local = sta->local; + int err = 0; + + might_sleep(); - err = sta_info_finish_insert(sta, false); + err = sta_info_insert_check(sta); if (err) { - mutex_unlock(&local->sta_mtx); rcu_read_lock(); goto out_free; } -#ifdef CONFIG_MAC80211_VERBOSE_DEBUG - wiphy_debug(local->hw.wiphy, "Inserted STA %pM\n", sta->sta.addr); -#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ - - /* move reference to rcu-protected */ - rcu_read_lock(); - mutex_unlock(&local->sta_mtx); + mutex_lock(&local->sta_mtx); - if (ieee80211_vif_is_mesh(&sdata->vif)) - mesh_accept_plinks_update(sdata); + err = sta_info_insert_finish(sta); + if (err) + goto out_free; return 0; out_free: BUG_ON(!err); - __sta_info_free(local, sta); + sta_info_free(local, sta); return err; } @@ -492,6 +475,25 @@ int sta_info_insert(struct sta_info *sta) return err; } +/* Caller must hold sta->local->sta_mtx */ +int sta_info_reinsert(struct sta_info *sta) +{ + struct ieee80211_local *local = sta->local; + int err = 0; + + err = sta_info_insert_check(sta); + if (err) { + mutex_unlock(&local->sta_mtx); + return err; + } + + might_sleep(); + + err = sta_info_insert_finish(sta); + rcu_read_unlock(); + return err; +} + static inline void __bss_tim_set(struct ieee80211_if_ap *bss, u16 aid) { /* @@ -510,64 +512,93 @@ static inline void __bss_tim_clear(struct ieee80211_if_ap *bss, u16 aid) bss->tim[aid / 8] &= ~(1 << (aid % 8)); } -static void __sta_info_set_tim_bit(struct ieee80211_if_ap *bss, - struct sta_info *sta) +static unsigned long ieee80211_tids_for_ac(int ac) { - BUG_ON(!bss); - - __bss_tim_set(bss, sta->sta.aid); - - if (sta->local->ops->set_tim) { - sta->local->tim_in_locked_section = true; - drv_set_tim(sta->local, &sta->sta, true); - sta->local->tim_in_locked_section = false; + /* If we ever support TIDs > 7, this obviously needs to be adjusted */ + switch (ac) { + case IEEE80211_AC_VO: + return BIT(6) | BIT(7); + case IEEE80211_AC_VI: + return BIT(4) | BIT(5); + case IEEE80211_AC_BE: + return BIT(0) | BIT(3); + case IEEE80211_AC_BK: + return BIT(1) | BIT(2); + default: + WARN_ON(1); + return 0; } } -void sta_info_set_tim_bit(struct sta_info *sta) +void sta_info_recalc_tim(struct sta_info *sta) { + struct ieee80211_local *local = sta->local; + struct ieee80211_if_ap *bss = sta->sdata->bss; unsigned long flags; + bool indicate_tim = false; + u8 ignore_for_tim = sta->sta.uapsd_queues; + int ac; - BUG_ON(!sta->sdata->bss); + if (WARN_ON_ONCE(!sta->sdata->bss)) + return; - spin_lock_irqsave(&sta->local->sta_lock, flags); - __sta_info_set_tim_bit(sta->sdata->bss, sta); - spin_unlock_irqrestore(&sta->local->sta_lock, flags); -} + /* No need to do anything if the driver does all */ + if (local->hw.flags & IEEE80211_HW_AP_LINK_PS) + return; -static void __sta_info_clear_tim_bit(struct ieee80211_if_ap *bss, - struct sta_info *sta) -{ - BUG_ON(!bss); + if (sta->dead) + goto done; + + /* + * If all ACs are delivery-enabled then we should build + * the TIM bit for all ACs anyway; if only some are then + * we ignore those and build the TIM bit using only the + * non-enabled ones. + */ + if (ignore_for_tim == BIT(IEEE80211_NUM_ACS) - 1) + ignore_for_tim = 0; + + for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { + unsigned long tids; + + if (ignore_for_tim & BIT(ac)) + continue; + + indicate_tim |= !skb_queue_empty(&sta->tx_filtered[ac]) || + !skb_queue_empty(&sta->ps_tx_buf[ac]); + if (indicate_tim) + break; - __bss_tim_clear(bss, sta->sta.aid); + tids = ieee80211_tids_for_ac(ac); - if (sta->local->ops->set_tim) { - sta->local->tim_in_locked_section = true; - drv_set_tim(sta->local, &sta->sta, false); - sta->local->tim_in_locked_section = false; + indicate_tim |= + sta->driver_buffered_tids & tids; } -} -void sta_info_clear_tim_bit(struct sta_info *sta) -{ - unsigned long flags; + done: + spin_lock_irqsave(&local->tim_lock, flags); + + if (indicate_tim) + __bss_tim_set(bss, sta->sta.aid); + else + __bss_tim_clear(bss, sta->sta.aid); - BUG_ON(!sta->sdata->bss); + if (local->ops->set_tim) { + local->tim_in_locked_section = true; + drv_set_tim(local, &sta->sta, indicate_tim); + local->tim_in_locked_section = false; + } - spin_lock_irqsave(&sta->local->sta_lock, flags); - __sta_info_clear_tim_bit(sta->sdata->bss, sta); - spin_unlock_irqrestore(&sta->local->sta_lock, flags); + spin_unlock_irqrestore(&local->tim_lock, flags); } -static int sta_info_buffer_expired(struct sta_info *sta, - struct sk_buff *skb) +static bool sta_info_buffer_expired(struct sta_info *sta, struct sk_buff *skb) { struct ieee80211_tx_info *info; int timeout; if (!skb) - return 0; + return false; info = IEEE80211_SKB_CB(skb); @@ -581,24 +612,59 @@ static int sta_info_buffer_expired(struct sta_info *sta, } -static bool sta_info_cleanup_expire_buffered(struct ieee80211_local *local, - struct sta_info *sta) +static bool sta_info_cleanup_expire_buffered_ac(struct ieee80211_local *local, + struct sta_info *sta, int ac) { unsigned long flags; struct sk_buff *skb; - if (skb_queue_empty(&sta->ps_tx_buf)) - return false; + /* + * First check for frames that should expire on the filtered + * queue. Frames here were rejected by the driver and are on + * a separate queue to avoid reordering with normal PS-buffered + * frames. They also aren't accounted for right now in the + * total_ps_buffered counter. + */ + for (;;) { + spin_lock_irqsave(&sta->tx_filtered[ac].lock, flags); + skb = skb_peek(&sta->tx_filtered[ac]); + if (sta_info_buffer_expired(sta, skb)) + skb = __skb_dequeue(&sta->tx_filtered[ac]); + else + skb = NULL; + spin_unlock_irqrestore(&sta->tx_filtered[ac].lock, flags); + /* + * Frames are queued in order, so if this one + * hasn't expired yet we can stop testing. If + * we actually reached the end of the queue we + * also need to stop, of course. + */ + if (!skb) + break; + dev_kfree_skb(skb); + } + + /* + * Now also check the normal PS-buffered queue, this will + * only find something if the filtered queue was emptied + * since the filtered frames are all before the normal PS + * buffered frames. + */ for (;;) { - spin_lock_irqsave(&sta->ps_tx_buf.lock, flags); - skb = skb_peek(&sta->ps_tx_buf); + spin_lock_irqsave(&sta->ps_tx_buf[ac].lock, flags); + skb = skb_peek(&sta->ps_tx_buf[ac]); if (sta_info_buffer_expired(sta, skb)) - skb = __skb_dequeue(&sta->ps_tx_buf); + skb = __skb_dequeue(&sta->ps_tx_buf[ac]); else skb = NULL; - spin_unlock_irqrestore(&sta->ps_tx_buf.lock, flags); + spin_unlock_irqrestore(&sta->ps_tx_buf[ac].lock, flags); + /* + * frames are queued in order, so if this one + * hasn't expired yet (or we reached the end of + * the queue) we can stop testing + */ if (!skb) break; @@ -608,22 +674,47 @@ static bool sta_info_cleanup_expire_buffered(struct ieee80211_local *local, sta->sta.addr); #endif dev_kfree_skb(skb); - - if (skb_queue_empty(&sta->ps_tx_buf) && - !test_sta_flags(sta, WLAN_STA_PS_DRIVER_BUF)) - sta_info_clear_tim_bit(sta); } - return true; + /* + * Finally, recalculate the TIM bit for this station -- it might + * now be clear because the station was too slow to retrieve its + * frames. + */ + sta_info_recalc_tim(sta); + + /* + * Return whether there are any frames still buffered, this is + * used to check whether the cleanup timer still needs to run, + * if there are no frames we don't need to rearm the timer. + */ + return !(skb_queue_empty(&sta->ps_tx_buf[ac]) && + skb_queue_empty(&sta->tx_filtered[ac])); +} + +static bool sta_info_cleanup_expire_buffered(struct ieee80211_local *local, + struct sta_info *sta) +{ + bool have_buffered = false; + int ac; + + /* This is only necessary for stations on BSS interfaces */ + if (!sta->sdata->bss) + return false; + + for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) + have_buffered |= + sta_info_cleanup_expire_buffered_ac(local, sta, ac); + + return have_buffered; } static int __must_check __sta_info_destroy(struct sta_info *sta) { struct ieee80211_local *local; struct ieee80211_sub_if_data *sdata; - struct sk_buff *skb; - unsigned long flags; - int ret, i; + int ret, i, ac; + struct tid_ampdu_tx *tid_tx; might_sleep(); @@ -639,18 +730,15 @@ static int __must_check __sta_info_destroy(struct sta_info *sta) * sessions -- block that to make sure the tear-down * will be sufficient. */ - set_sta_flags(sta, WLAN_STA_BLOCK_BA); + set_sta_flag(sta, WLAN_STA_BLOCK_BA); ieee80211_sta_tear_down_BA_sessions(sta, true); - spin_lock_irqsave(&local->sta_lock, flags); ret = sta_info_hash_del(local, sta); - /* this might still be the pending list ... which is fine */ - if (!ret) - list_del(&sta->list); - spin_unlock_irqrestore(&local->sta_lock, flags); if (ret) return ret; + list_del(&sta->list); + mutex_lock(&local->key_mtx); for (i = 0; i < NUM_DEFAULT_KEYS; i++) __ieee80211_key_free(key_mtx_dereference(local, sta->gtk[i])); @@ -660,19 +748,25 @@ static int __must_check __sta_info_destroy(struct sta_info *sta) sta->dead = true; - if (test_and_clear_sta_flags(sta, - WLAN_STA_PS_STA | WLAN_STA_PS_DRIVER)) { + if (test_sta_flag(sta, WLAN_STA_PS_STA) || + test_sta_flag(sta, WLAN_STA_PS_DRIVER)) { BUG_ON(!sdata->bss); + clear_sta_flag(sta, WLAN_STA_PS_STA); + clear_sta_flag(sta, WLAN_STA_PS_DRIVER); + atomic_dec(&sdata->bss->num_sta_ps); - sta_info_clear_tim_bit(sta); + sta_info_recalc_tim(sta); } local->num_sta--; local->sta_generation++; if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) - rcu_assign_pointer(sdata->u.vlan.sta, NULL); + RCU_INIT_POINTER(sdata->u.vlan.sta, NULL); + + while (sta->sta_state > IEEE80211_STA_NONE) + sta_info_move_state(sta, sta->sta_state - 1); if (sta->uploaded) { if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) @@ -691,6 +785,12 @@ static int __must_check __sta_info_destroy(struct sta_info *sta) */ synchronize_rcu(); + for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { + local->total_ps_buffered -= skb_queue_len(&sta->ps_tx_buf[ac]); + __skb_queue_purge(&sta->ps_tx_buf[ac]); + __skb_queue_purge(&sta->tx_filtered[ac]); + } + #ifdef CONFIG_MAC80211_MESH if (ieee80211_vif_is_mesh(&sdata->vif)) mesh_accept_plinks_update(sdata); @@ -713,15 +813,36 @@ static int __must_check __sta_info_destroy(struct sta_info *sta) } #endif - while ((skb = skb_dequeue(&sta->ps_tx_buf)) != NULL) { - local->total_ps_buffered--; - dev_kfree_skb_any(skb); + /* There could be some memory leaks because of ampdu tx pending queue + * not being freed before destroying the station info. + * + * Make sure that such queues are purged before freeing the station + * info. + * TODO: We have to somehow postpone the full destruction + * until the aggregation stop completes. Refer + * http://thread.gmane.org/gmane.linux.kernel.wireless.general/81936 + */ + + mutex_lock(&sta->ampdu_mlme.mtx); + + for (i = 0; i < STA_TID_NUM; i++) { + tid_tx = rcu_dereference_protected_tid_tx(sta, i); + if (!tid_tx) + continue; + if (skb_queue_len(&tid_tx->pending)) { +#ifdef CONFIG_MAC80211_HT_DEBUG + wiphy_debug(local->hw.wiphy, "TX A-MPDU purging %d " + "packets for tid=%d\n", + skb_queue_len(&tid_tx->pending), i); +#endif /* CONFIG_MAC80211_HT_DEBUG */ + __skb_queue_purge(&tid_tx->pending); + } + kfree_rcu(tid_tx, rcu_head); } - while ((skb = skb_dequeue(&sta->tx_filtered)) != NULL) - dev_kfree_skb_any(skb); + mutex_unlock(&sta->ampdu_mlme.mtx); - __sta_info_free(local, sta); + sta_info_free(local, sta); return 0; } @@ -732,7 +853,7 @@ int sta_info_destroy_addr(struct ieee80211_sub_if_data *sdata, const u8 *addr) int ret; mutex_lock(&sdata->local->sta_mtx); - sta = sta_info_get(sdata, addr); + sta = sta_info_get_rx(sdata, addr); ret = __sta_info_destroy(sta); mutex_unlock(&sdata->local->sta_mtx); @@ -746,7 +867,7 @@ int sta_info_destroy_addr_bss(struct ieee80211_sub_if_data *sdata, int ret; mutex_lock(&sdata->local->sta_mtx); - sta = sta_info_get_bss(sdata, addr); + sta = sta_info_get_bss_rx(sdata, addr); ret = __sta_info_destroy(sta); mutex_unlock(&sdata->local->sta_mtx); @@ -777,11 +898,9 @@ static void sta_info_cleanup(unsigned long data) void sta_info_init(struct ieee80211_local *local) { - spin_lock_init(&local->sta_lock); + spin_lock_init(&local->tim_lock); mutex_init(&local->sta_mtx); INIT_LIST_HEAD(&local->sta_list); - INIT_LIST_HEAD(&local->sta_pending_list); - INIT_WORK(&local->sta_finish_work, sta_info_finish_work); setup_timer(&local->sta_cleanup, sta_info_cleanup, (unsigned long)local); @@ -810,9 +929,6 @@ int sta_info_flush(struct ieee80211_local *local, might_sleep(); mutex_lock(&local->sta_mtx); - - sta_info_finish_pending(local); - list_for_each_entry_safe(sta, tmp, &local->sta_list, list) { if (!sdata || sdata == sta->sdata) WARN_ON(__sta_info_destroy(sta)); @@ -829,7 +945,11 @@ void ieee80211_sta_expire(struct ieee80211_sub_if_data *sdata, struct sta_info *sta, *tmp; mutex_lock(&local->sta_mtx); - list_for_each_entry_safe(sta, tmp, &local->sta_list, list) + + list_for_each_entry_safe(sta, tmp, &local->sta_list, list) { + if (sdata != sta->sdata) + continue; + if (time_after(jiffies, sta->last_rx + exp_time)) { #ifdef CONFIG_MAC80211_IBSS_DEBUG printk(KERN_DEBUG "%s: expiring inactive STA %pM\n", @@ -837,6 +957,8 @@ void ieee80211_sta_expire(struct ieee80211_sub_if_data *sdata, #endif WARN_ON(__sta_info_destroy(sta)); } + } + mutex_unlock(&local->sta_mtx); } @@ -886,7 +1008,8 @@ static void clear_sta_ps_flags(void *_sta) { struct sta_info *sta = _sta; - clear_sta_flags(sta, WLAN_STA_PS_DRIVER | WLAN_STA_PS_STA); + clear_sta_flag(sta, WLAN_STA_PS_DRIVER); + clear_sta_flag(sta, WLAN_STA_PS_STA); } /* powersave support code */ @@ -894,88 +1017,341 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta) { struct ieee80211_sub_if_data *sdata = sta->sdata; struct ieee80211_local *local = sdata->local; - int sent, buffered; + struct sk_buff_head pending; + int filtered = 0, buffered = 0, ac; + + clear_sta_flag(sta, WLAN_STA_SP); + + BUILD_BUG_ON(BITS_TO_LONGS(STA_TID_NUM) > 1); + sta->driver_buffered_tids = 0; - clear_sta_flags(sta, WLAN_STA_PS_DRIVER_BUF); if (!(local->hw.flags & IEEE80211_HW_AP_LINK_PS)) drv_sta_notify(local, sdata, STA_NOTIFY_AWAKE, &sta->sta); - if (!skb_queue_empty(&sta->ps_tx_buf)) - sta_info_clear_tim_bit(sta); + skb_queue_head_init(&pending); /* Send all buffered frames to the station */ - sent = ieee80211_add_pending_skbs(local, &sta->tx_filtered); - buffered = ieee80211_add_pending_skbs_fn(local, &sta->ps_tx_buf, - clear_sta_ps_flags, sta); - sent += buffered; + for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { + int count = skb_queue_len(&pending), tmp; + + skb_queue_splice_tail_init(&sta->tx_filtered[ac], &pending); + tmp = skb_queue_len(&pending); + filtered += tmp - count; + count = tmp; + + skb_queue_splice_tail_init(&sta->ps_tx_buf[ac], &pending); + tmp = skb_queue_len(&pending); + buffered += tmp - count; + } + + ieee80211_add_pending_skbs_fn(local, &pending, clear_sta_ps_flags, sta); + local->total_ps_buffered -= buffered; + sta_info_recalc_tim(sta); + #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG printk(KERN_DEBUG "%s: STA %pM aid %d sending %d filtered/%d PS frames " "since STA not sleeping anymore\n", sdata->name, - sta->sta.addr, sta->sta.aid, sent - buffered, buffered); + sta->sta.addr, sta->sta.aid, filtered, buffered); #endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ } -void ieee80211_sta_ps_deliver_poll_response(struct sta_info *sta) +static void ieee80211_send_null_response(struct ieee80211_sub_if_data *sdata, + struct sta_info *sta, int tid, + enum ieee80211_frame_release_type reason) { - struct ieee80211_sub_if_data *sdata = sta->sdata; struct ieee80211_local *local = sdata->local; + struct ieee80211_qos_hdr *nullfunc; struct sk_buff *skb; - int no_pending_pkts; + int size = sizeof(*nullfunc); + __le16 fc; + bool qos = test_sta_flag(sta, WLAN_STA_WME); + struct ieee80211_tx_info *info; - skb = skb_dequeue(&sta->tx_filtered); - if (!skb) { - skb = skb_dequeue(&sta->ps_tx_buf); - if (skb) - local->total_ps_buffered--; + if (qos) { + fc = cpu_to_le16(IEEE80211_FTYPE_DATA | + IEEE80211_STYPE_QOS_NULLFUNC | + IEEE80211_FCTL_FROMDS); + } else { + size -= 2; + fc = cpu_to_le16(IEEE80211_FTYPE_DATA | + IEEE80211_STYPE_NULLFUNC | + IEEE80211_FCTL_FROMDS); + } + + skb = dev_alloc_skb(local->hw.extra_tx_headroom + size); + if (!skb) + return; + + skb_reserve(skb, local->hw.extra_tx_headroom); + + nullfunc = (void *) skb_put(skb, size); + nullfunc->frame_control = fc; + nullfunc->duration_id = 0; + memcpy(nullfunc->addr1, sta->sta.addr, ETH_ALEN); + memcpy(nullfunc->addr2, sdata->vif.addr, ETH_ALEN); + memcpy(nullfunc->addr3, sdata->vif.addr, ETH_ALEN); + + skb->priority = tid; + skb_set_queue_mapping(skb, ieee802_1d_to_ac[tid]); + if (qos) { + nullfunc->qos_ctrl = cpu_to_le16(tid); + + if (reason == IEEE80211_FRAME_RELEASE_UAPSD) + nullfunc->qos_ctrl |= + cpu_to_le16(IEEE80211_QOS_CTL_EOSP); } - no_pending_pkts = skb_queue_empty(&sta->tx_filtered) && - skb_queue_empty(&sta->ps_tx_buf); - if (skb) { - struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); - struct ieee80211_hdr *hdr = - (struct ieee80211_hdr *) skb->data; + info = IEEE80211_SKB_CB(skb); + + /* + * Tell TX path to send this frame even though the + * STA may still remain is PS mode after this frame + * exchange. Also set EOSP to indicate this packet + * ends the poll/service period. + */ + info->flags |= IEEE80211_TX_CTL_POLL_RESPONSE | + IEEE80211_TX_STATUS_EOSP | + IEEE80211_TX_CTL_REQ_TX_STATUS; + + drv_allow_buffered_frames(local, sta, BIT(tid), 1, reason, false); + + ieee80211_xmit(sdata, skb); +} + +static void +ieee80211_sta_ps_deliver_response(struct sta_info *sta, + int n_frames, u8 ignored_acs, + enum ieee80211_frame_release_type reason) +{ + struct ieee80211_sub_if_data *sdata = sta->sdata; + struct ieee80211_local *local = sdata->local; + bool found = false; + bool more_data = false; + int ac; + unsigned long driver_release_tids = 0; + struct sk_buff_head frames; + + /* Service or PS-Poll period starts */ + set_sta_flag(sta, WLAN_STA_SP); + + __skb_queue_head_init(&frames); + + /* + * Get response frame(s) and more data bit for it. + */ + for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { + unsigned long tids; + + if (ignored_acs & BIT(ac)) + continue; + + tids = ieee80211_tids_for_ac(ac); + + if (!found) { + driver_release_tids = sta->driver_buffered_tids & tids; + if (driver_release_tids) { + found = true; + } else { + struct sk_buff *skb; + + while (n_frames > 0) { + skb = skb_dequeue(&sta->tx_filtered[ac]); + if (!skb) { + skb = skb_dequeue( + &sta->ps_tx_buf[ac]); + if (skb) + local->total_ps_buffered--; + } + if (!skb) + break; + n_frames--; + found = true; + __skb_queue_tail(&frames, skb); + } + } + + /* + * If the driver has data on more than one TID then + * certainly there's more data if we release just a + * single frame now (from a single TID). + */ + if (reason == IEEE80211_FRAME_RELEASE_PSPOLL && + hweight16(driver_release_tids) > 1) { + more_data = true; + driver_release_tids = + BIT(ffs(driver_release_tids) - 1); + break; + } + } + + if (!skb_queue_empty(&sta->tx_filtered[ac]) || + !skb_queue_empty(&sta->ps_tx_buf[ac])) { + more_data = true; + break; + } + } + + if (!found) { + int tid; /* - * Tell TX path to send this frame even though the STA may - * still remain is PS mode after this frame exchange. + * For PS-Poll, this can only happen due to a race condition + * when we set the TIM bit and the station notices it, but + * before it can poll for the frame we expire it. + * + * For uAPSD, this is said in the standard (11.2.1.5 h): + * At each unscheduled SP for a non-AP STA, the AP shall + * attempt to transmit at least one MSDU or MMPDU, but no + * more than the value specified in the Max SP Length field + * in the QoS Capability element from delivery-enabled ACs, + * that are destined for the non-AP STA. + * + * Since we have no other MSDU/MMPDU, transmit a QoS null frame. */ - info->flags |= IEEE80211_TX_CTL_PSPOLL_RESPONSE; -#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG - printk(KERN_DEBUG "STA %pM aid %d: PS Poll (entries after %d)\n", - sta->sta.addr, sta->sta.aid, - skb_queue_len(&sta->ps_tx_buf)); -#endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ + /* This will evaluate to 1, 3, 5 or 7. */ + tid = 7 - ((ffs(~ignored_acs) - 1) << 1); - /* Use MoreData flag to indicate whether there are more - * buffered frames for this STA */ - if (no_pending_pkts) - hdr->frame_control &= cpu_to_le16(~IEEE80211_FCTL_MOREDATA); - else - hdr->frame_control |= cpu_to_le16(IEEE80211_FCTL_MOREDATA); + ieee80211_send_null_response(sdata, sta, tid, reason); + return; + } - ieee80211_add_pending_skb(local, skb); + if (!driver_release_tids) { + struct sk_buff_head pending; + struct sk_buff *skb; + int num = 0; + u16 tids = 0; + + skb_queue_head_init(&pending); + + while ((skb = __skb_dequeue(&frames))) { + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + struct ieee80211_hdr *hdr = (void *) skb->data; + u8 *qoshdr = NULL; + + num++; + + /* + * Tell TX path to send this frame even though the + * STA may still remain is PS mode after this frame + * exchange. + */ + info->flags |= IEEE80211_TX_CTL_POLL_RESPONSE; + + /* + * Use MoreData flag to indicate whether there are + * more buffered frames for this STA + */ + if (more_data || !skb_queue_empty(&frames)) + hdr->frame_control |= + cpu_to_le16(IEEE80211_FCTL_MOREDATA); + else + hdr->frame_control &= + cpu_to_le16(~IEEE80211_FCTL_MOREDATA); + + if (ieee80211_is_data_qos(hdr->frame_control) || + ieee80211_is_qos_nullfunc(hdr->frame_control)) + qoshdr = ieee80211_get_qos_ctl(hdr); + + /* set EOSP for the frame */ + if (reason == IEEE80211_FRAME_RELEASE_UAPSD && + qoshdr && skb_queue_empty(&frames)) + *qoshdr |= IEEE80211_QOS_CTL_EOSP; + + info->flags |= IEEE80211_TX_STATUS_EOSP | + IEEE80211_TX_CTL_REQ_TX_STATUS; + + if (qoshdr) + tids |= BIT(*qoshdr & IEEE80211_QOS_CTL_TID_MASK); + else + tids |= BIT(0); + + __skb_queue_tail(&pending, skb); + } - if (no_pending_pkts) - sta_info_clear_tim_bit(sta); -#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG + drv_allow_buffered_frames(local, sta, tids, num, + reason, more_data); + + ieee80211_add_pending_skbs(local, &pending); + + sta_info_recalc_tim(sta); } else { /* - * FIXME: This can be the result of a race condition between - * us expiring a frame and the station polling for it. - * Should we send it a null-func frame indicating we - * have nothing buffered for it? + * We need to release a frame that is buffered somewhere in the + * driver ... it'll have to handle that. + * Note that, as per the comment above, it'll also have to see + * if there is more than just one frame on the specific TID that + * we're releasing from, and it needs to set the more-data bit + * accordingly if we tell it that there's no more data. If we do + * tell it there's more data, then of course the more-data bit + * needs to be set anyway. + */ + drv_release_buffered_frames(local, sta, driver_release_tids, + n_frames, reason, more_data); + + /* + * Note that we don't recalculate the TIM bit here as it would + * most likely have no effect at all unless the driver told us + * that the TID became empty before returning here from the + * release function. + * Either way, however, when the driver tells us that the TID + * became empty we'll do the TIM recalculation. */ - printk(KERN_DEBUG "%s: STA %pM sent PS Poll even " - "though there are no buffered frames for it\n", - sdata->name, sta->sta.addr); -#endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ } } +void ieee80211_sta_ps_deliver_poll_response(struct sta_info *sta) +{ + u8 ignore_for_response = sta->sta.uapsd_queues; + + /* + * If all ACs are delivery-enabled then we should reply + * from any of them, if only some are enabled we reply + * only from the non-enabled ones. + */ + if (ignore_for_response == BIT(IEEE80211_NUM_ACS) - 1) + ignore_for_response = 0; + + ieee80211_sta_ps_deliver_response(sta, 1, ignore_for_response, + IEEE80211_FRAME_RELEASE_PSPOLL); +} + +void ieee80211_sta_ps_deliver_uapsd(struct sta_info *sta) +{ + int n_frames = sta->sta.max_sp; + u8 delivery_enabled = sta->sta.uapsd_queues; + + /* + * If we ever grow support for TSPEC this might happen if + * the TSPEC update from hostapd comes in between a trigger + * frame setting WLAN_STA_UAPSD in the RX path and this + * actually getting called. + */ + if (!delivery_enabled) + return; + + switch (sta->sta.max_sp) { + case 1: + n_frames = 2; + break; + case 2: + n_frames = 4; + break; + case 3: + n_frames = 6; + break; + case 0: + /* XXX: what is a good value? */ + n_frames = 8; + break; + } + + ieee80211_sta_ps_deliver_response(sta, n_frames, ~delivery_enabled, + IEEE80211_FRAME_RELEASE_UAPSD); +} + void ieee80211_sta_block_awake(struct ieee80211_hw *hw, struct ieee80211_sta *pubsta, bool block) { @@ -984,17 +1360,103 @@ void ieee80211_sta_block_awake(struct ieee80211_hw *hw, trace_api_sta_block_awake(sta->local, pubsta, block); if (block) - set_sta_flags(sta, WLAN_STA_PS_DRIVER); - else if (test_sta_flags(sta, WLAN_STA_PS_DRIVER)) + set_sta_flag(sta, WLAN_STA_PS_DRIVER); + else if (test_sta_flag(sta, WLAN_STA_PS_DRIVER)) ieee80211_queue_work(hw, &sta->drv_unblock_wk); } EXPORT_SYMBOL(ieee80211_sta_block_awake); -void ieee80211_sta_set_tim(struct ieee80211_sta *pubsta) +void ieee80211_sta_eosp_irqsafe(struct ieee80211_sta *pubsta) +{ + struct sta_info *sta = container_of(pubsta, struct sta_info, sta); + struct ieee80211_local *local = sta->local; + struct sk_buff *skb; + struct skb_eosp_msg_data *data; + + trace_api_eosp(local, pubsta); + + skb = alloc_skb(0, GFP_ATOMIC); + if (!skb) { + /* too bad ... but race is better than loss */ + clear_sta_flag(sta, WLAN_STA_SP); + return; + } + + data = (void *)skb->cb; + memcpy(data->sta, pubsta->addr, ETH_ALEN); + memcpy(data->iface, sta->sdata->vif.addr, ETH_ALEN); + skb->pkt_type = IEEE80211_EOSP_MSG; + skb_queue_tail(&local->skb_queue, skb); + tasklet_schedule(&local->tasklet); +} +EXPORT_SYMBOL(ieee80211_sta_eosp_irqsafe); + +void ieee80211_sta_set_buffered(struct ieee80211_sta *pubsta, + u8 tid, bool buffered) { struct sta_info *sta = container_of(pubsta, struct sta_info, sta); - set_sta_flags(sta, WLAN_STA_PS_DRIVER_BUF); - sta_info_set_tim_bit(sta); + if (WARN_ON(tid >= STA_TID_NUM)) + return; + + if (buffered) + set_bit(tid, &sta->driver_buffered_tids); + else + clear_bit(tid, &sta->driver_buffered_tids); + + sta_info_recalc_tim(sta); +} +EXPORT_SYMBOL(ieee80211_sta_set_buffered); + +int sta_info_move_state_checked(struct sta_info *sta, + enum ieee80211_sta_state new_state) +{ + might_sleep(); + + if (sta->sta_state == new_state) + return 0; + + switch (new_state) { + case IEEE80211_STA_NONE: + if (sta->sta_state == IEEE80211_STA_AUTH) + clear_bit(WLAN_STA_AUTH, &sta->_flags); + else + return -EINVAL; + break; + case IEEE80211_STA_AUTH: + if (sta->sta_state == IEEE80211_STA_NONE) + set_bit(WLAN_STA_AUTH, &sta->_flags); + else if (sta->sta_state == IEEE80211_STA_ASSOC) + clear_bit(WLAN_STA_ASSOC, &sta->_flags); + else + return -EINVAL; + break; + case IEEE80211_STA_ASSOC: + if (sta->sta_state == IEEE80211_STA_AUTH) { + set_bit(WLAN_STA_ASSOC, &sta->_flags); + } else if (sta->sta_state == IEEE80211_STA_AUTHORIZED) { + if (sta->sdata->vif.type == NL80211_IFTYPE_AP) + atomic_dec(&sta->sdata->u.ap.num_sta_authorized); + clear_bit(WLAN_STA_AUTHORIZED, &sta->_flags); + } else + return -EINVAL; + break; + case IEEE80211_STA_AUTHORIZED: + if (sta->sta_state == IEEE80211_STA_ASSOC) { + if (sta->sdata->vif.type == NL80211_IFTYPE_AP) + atomic_inc(&sta->sdata->u.ap.num_sta_authorized); + set_bit(WLAN_STA_AUTHORIZED, &sta->_flags); + } else + return -EINVAL; + break; + default: + WARN(1, "invalid state %d", new_state); + return -EINVAL; + } + + printk(KERN_DEBUG "%s: moving STA %pM to state %d\n", + sta->sdata->name, sta->sta.addr, new_state); + sta->sta_state = new_state; + + return 0; } -EXPORT_SYMBOL(ieee80211_sta_set_tim); diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 28beb78e601e..6f77f12dc3fc 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -19,7 +19,8 @@ /** * enum ieee80211_sta_info_flags - Stations flags * - * These flags are used with &struct sta_info's @flags member. + * These flags are used with &struct sta_info's @flags member, but + * only indirectly with set_sta_flag() and friends. * * @WLAN_STA_AUTH: Station is authenticated. * @WLAN_STA_ASSOC: Station is associated. @@ -29,7 +30,6 @@ * when virtual port control is not in use. * @WLAN_STA_SHORT_PREAMBLE: Station is capable of receiving short-preamble * frames. - * @WLAN_STA_ASSOC_AP: We're associated to that station, it is an AP. * @WLAN_STA_WME: Station is a QoS-STA. * @WLAN_STA_WDS: Station is one of our WDS peers. * @WLAN_STA_CLEAR_PS_FILT: Clear PS filter in hardware (using the @@ -43,29 +43,49 @@ * be in the queues * @WLAN_STA_PSPOLL: Station sent PS-poll while driver was keeping * station in power-save mode, reply when the driver unblocks. - * @WLAN_STA_PS_DRIVER_BUF: Station has frames pending in driver internal - * buffers. Automatically cleared on station wake-up. + * @WLAN_STA_TDLS_PEER: Station is a TDLS peer. + * @WLAN_STA_TDLS_PEER_AUTH: This TDLS peer is authorized to send direct + * packets. This means the link is enabled. + * @WLAN_STA_UAPSD: Station requested unscheduled SP while driver was + * keeping station in power-save mode, reply when the driver + * unblocks the station. + * @WLAN_STA_SP: Station is in a service period, so don't try to + * reply to other uAPSD trigger frames or PS-Poll. + * @WLAN_STA_4ADDR_EVENT: 4-addr event was already sent for this frame. */ enum ieee80211_sta_info_flags { - WLAN_STA_AUTH = 1<<0, - WLAN_STA_ASSOC = 1<<1, - WLAN_STA_PS_STA = 1<<2, - WLAN_STA_AUTHORIZED = 1<<3, - WLAN_STA_SHORT_PREAMBLE = 1<<4, - WLAN_STA_ASSOC_AP = 1<<5, - WLAN_STA_WME = 1<<6, - WLAN_STA_WDS = 1<<7, - WLAN_STA_CLEAR_PS_FILT = 1<<9, - WLAN_STA_MFP = 1<<10, - WLAN_STA_BLOCK_BA = 1<<11, - WLAN_STA_PS_DRIVER = 1<<12, - WLAN_STA_PSPOLL = 1<<13, - WLAN_STA_PS_DRIVER_BUF = 1<<14, + WLAN_STA_AUTH, + WLAN_STA_ASSOC, + WLAN_STA_PS_STA, + WLAN_STA_AUTHORIZED, + WLAN_STA_SHORT_PREAMBLE, + WLAN_STA_WME, + WLAN_STA_WDS, + WLAN_STA_CLEAR_PS_FILT, + WLAN_STA_MFP, + WLAN_STA_BLOCK_BA, + WLAN_STA_PS_DRIVER, + WLAN_STA_PSPOLL, + WLAN_STA_TDLS_PEER, + WLAN_STA_TDLS_PEER_AUTH, + WLAN_STA_UAPSD, + WLAN_STA_SP, + WLAN_STA_4ADDR_EVENT, +}; + +enum ieee80211_sta_state { + /* NOTE: These need to be ordered correctly! */ + IEEE80211_STA_NONE, + IEEE80211_STA_AUTH, + IEEE80211_STA_ASSOC, + IEEE80211_STA_AUTHORIZED, }; #define STA_TID_NUM 16 #define ADDBA_RESP_INTERVAL HZ -#define HT_AGG_MAX_RETRIES 0x3 +#define HT_AGG_MAX_RETRIES 15 +#define HT_AGG_BURST_RETRIES 3 +#define HT_AGG_RETRIES_PERIOD (15 * HZ) #define HT_AGG_STATE_DRV_READY 0 #define HT_AGG_STATE_RESPONSE_RECEIVED 1 @@ -78,6 +98,7 @@ enum ieee80211_sta_info_flags { * struct tid_ampdu_tx - TID aggregation information (Tx). * * @rcu_head: rcu head for freeing structure + * @session_timer: check if we keep Tx-ing on the TID (by timeout value) * @addba_resp_timer: timer for peer's response to addba request * @pending: pending frames queue -- use sta's spinlock to protect * @dialog_token: dialog token for aggregation session @@ -86,6 +107,8 @@ enum ieee80211_sta_info_flags { * @stop_initiator: initiator of a session stop * @tx_stop: TX DelBA frame when stopping * @buf_size: reorder buffer size at receiver + * @failed_bar_ssn: ssn of the last failed BAR tx attempt + * @bar_pending: BAR needs to be re-sent * * This structure's lifetime is managed by RCU, assignments to * the array holding it must hold the aggregation mutex. @@ -98,6 +121,7 @@ enum ieee80211_sta_info_flags { */ struct tid_ampdu_tx { struct rcu_head rcu_head; + struct timer_list session_timer; struct timer_list addba_resp_timer; struct sk_buff_head pending; unsigned long state; @@ -106,6 +130,9 @@ struct tid_ampdu_tx { u8 stop_initiator; bool tx_stop; u8 buf_size; + + u16 failed_bar_ssn; + bool bar_pending; }; /** @@ -154,6 +181,7 @@ struct tid_ampdu_rx { * @tid_tx: aggregation info for Tx per TID * @tid_start_tx: sessions where start was requested * @addba_req_num: number of times addBA request has been sent. + * @last_addba_req_time: timestamp of the last addBA request. * @dialog_token_allocator: dialog token enumerator for each new session; * @work: work struct for starting/stopping aggregation * @tid_rx_timer_expired: bitmap indicating on which TIDs the @@ -173,6 +201,7 @@ struct sta_ampdu_mlme { struct work_struct work; struct tid_ampdu_tx __rcu *tid_tx[STA_TID_NUM]; struct tid_ampdu_tx *tid_start_tx[STA_TID_NUM]; + unsigned long last_addba_req_time[STA_TID_NUM]; u8 addba_req_num[STA_TID_NUM]; u8 dialog_token_allocator; }; @@ -198,15 +227,16 @@ struct sta_ampdu_mlme { * @last_rx_rate_flag: rx status flag of the last data packet * @lock: used for locking all fields that require locking, see comments * in the header file. - * @flaglock: spinlock for flags accesses * @drv_unblock_wk: used for driver PS unblocking * @listen_interval: listen interval of this station, when we're acting as AP - * @flags: STA flags, see &enum ieee80211_sta_info_flags - * @ps_tx_buf: buffer of frames to transmit to this station - * when it leaves power saving state - * @tx_filtered: buffer of frames we already tried to transmit - * but were filtered by hardware due to STA having entered - * power saving state + * @_flags: STA flags, see &enum ieee80211_sta_info_flags, do not use directly + * @ps_tx_buf: buffers (per AC) of frames to transmit to this station + * when it leaves power saving state or polls + * @tx_filtered: buffers (per AC) of frames we already tried to + * transmit but were filtered by hardware due to STA having + * entered power saving state, these are also delivered to + * the station when it leaves powersave or polls for frames + * @driver_buffered_tids: bitmap of TIDs the driver has data buffered on * @rx_packets: Number of MSDUs received from this STA * @rx_bytes: Number of bytes received from this STA * @wep_weak_iv_count: number of weak WEP IVs received from this station @@ -238,10 +268,14 @@ struct sta_ampdu_mlme { * @plink_timer: peer link watch timer * @plink_timer_was_running: used by suspend/resume to restore timers * @debugfs: debug filesystem info - * @sta: station information we share with the driver * @dead: set to true when sta is unlinked * @uploaded: set to true when sta is uploaded to the driver * @lost_packets: number of consecutive lost packets + * @dummy: indicate a dummy station created for receiving + * EAP frames before association + * @sta: station information we share with the driver + * @sta_state: duplicates information about station state (for debug) + * @beacon_loss_count: number of times beacon loss has triggered */ struct sta_info { /* General information, mostly static */ @@ -254,7 +288,6 @@ struct sta_info { struct rate_control_ref *rate_ctrl; void *rate_ctrl_priv; spinlock_t lock; - spinlock_t flaglock; struct work_struct drv_unblock_wk; @@ -264,18 +297,18 @@ struct sta_info { bool uploaded; - /* - * frequently updated, locked with own spinlock (flaglock), - * use the accessors defined below - */ - u32 flags; + enum ieee80211_sta_state sta_state; + + /* use the accessors defined below */ + unsigned long _flags; /* * STA powersave frame queues, no more than the internal * locking required. */ - struct sk_buff_head ps_tx_buf; - struct sk_buff_head tx_filtered; + struct sk_buff_head ps_tx_buf[IEEE80211_NUM_ACS]; + struct sk_buff_head tx_filtered[IEEE80211_NUM_ACS]; + unsigned long driver_buffered_tids; /* Updated from RX path only, no locking requirements */ unsigned long rx_packets, rx_bytes; @@ -335,6 +368,10 @@ struct sta_info { #endif unsigned int lost_packets; + unsigned int beacon_loss_count; + + /* should be right in front of sta to be in the same cache line */ + bool dummy; /* keep last! */ struct ieee80211_sta sta; @@ -348,62 +385,59 @@ static inline enum nl80211_plink_state sta_plink_state(struct sta_info *sta) return NL80211_PLINK_LISTEN; } -static inline void set_sta_flags(struct sta_info *sta, const u32 flags) +static inline void set_sta_flag(struct sta_info *sta, + enum ieee80211_sta_info_flags flag) { - unsigned long irqfl; - - spin_lock_irqsave(&sta->flaglock, irqfl); - sta->flags |= flags; - spin_unlock_irqrestore(&sta->flaglock, irqfl); + WARN_ON(flag == WLAN_STA_AUTH || + flag == WLAN_STA_ASSOC || + flag == WLAN_STA_AUTHORIZED); + set_bit(flag, &sta->_flags); } -static inline void clear_sta_flags(struct sta_info *sta, const u32 flags) +static inline void clear_sta_flag(struct sta_info *sta, + enum ieee80211_sta_info_flags flag) { - unsigned long irqfl; - - spin_lock_irqsave(&sta->flaglock, irqfl); - sta->flags &= ~flags; - spin_unlock_irqrestore(&sta->flaglock, irqfl); + WARN_ON(flag == WLAN_STA_AUTH || + flag == WLAN_STA_ASSOC || + flag == WLAN_STA_AUTHORIZED); + clear_bit(flag, &sta->_flags); } -static inline u32 test_sta_flags(struct sta_info *sta, const u32 flags) +static inline int test_sta_flag(struct sta_info *sta, + enum ieee80211_sta_info_flags flag) { - u32 ret; - unsigned long irqfl; - - spin_lock_irqsave(&sta->flaglock, irqfl); - ret = sta->flags & flags; - spin_unlock_irqrestore(&sta->flaglock, irqfl); - - return ret; + return test_bit(flag, &sta->_flags); } -static inline u32 test_and_clear_sta_flags(struct sta_info *sta, - const u32 flags) +static inline int test_and_clear_sta_flag(struct sta_info *sta, + enum ieee80211_sta_info_flags flag) { - u32 ret; - unsigned long irqfl; - - spin_lock_irqsave(&sta->flaglock, irqfl); - ret = sta->flags & flags; - sta->flags &= ~flags; - spin_unlock_irqrestore(&sta->flaglock, irqfl); - - return ret; + WARN_ON(flag == WLAN_STA_AUTH || + flag == WLAN_STA_ASSOC || + flag == WLAN_STA_AUTHORIZED); + return test_and_clear_bit(flag, &sta->_flags); } -static inline u32 get_sta_flags(struct sta_info *sta) +static inline int test_and_set_sta_flag(struct sta_info *sta, + enum ieee80211_sta_info_flags flag) { - u32 ret; - unsigned long irqfl; + WARN_ON(flag == WLAN_STA_AUTH || + flag == WLAN_STA_ASSOC || + flag == WLAN_STA_AUTHORIZED); + return test_and_set_bit(flag, &sta->_flags); +} - spin_lock_irqsave(&sta->flaglock, irqfl); - ret = sta->flags; - spin_unlock_irqrestore(&sta->flaglock, irqfl); +int sta_info_move_state_checked(struct sta_info *sta, + enum ieee80211_sta_state new_state); - return ret; +static inline void sta_info_move_state(struct sta_info *sta, + enum ieee80211_sta_state new_state) +{ + int ret = sta_info_move_state_checked(sta, new_state); + WARN_ON_ONCE(ret); } + void ieee80211_assign_tid_tx(struct sta_info *sta, int tid, struct tid_ampdu_tx *tid_tx); @@ -419,8 +453,8 @@ rcu_dereference_protected_tid_tx(struct sta_info *sta, int tid) #define STA_HASH(sta) (sta[5]) -/* Maximum number of frames to buffer per power saving station */ -#define STA_MAX_TX_BUFFER 128 +/* Maximum number of frames to buffer per power saving station per AC */ +#define STA_MAX_TX_BUFFER 64 /* Minimum buffered frame expiry time. If STA uses listen interval that is * smaller than this value, the minimum value here is used instead. */ @@ -436,9 +470,15 @@ rcu_dereference_protected_tid_tx(struct sta_info *sta, int tid) struct sta_info *sta_info_get(struct ieee80211_sub_if_data *sdata, const u8 *addr); +struct sta_info *sta_info_get_rx(struct ieee80211_sub_if_data *sdata, + const u8 *addr); + struct sta_info *sta_info_get_bss(struct ieee80211_sub_if_data *sdata, const u8 *addr); +struct sta_info *sta_info_get_bss_rx(struct ieee80211_sub_if_data *sdata, + const u8 *addr); + static inline void for_each_sta_info_type_check(struct ieee80211_local *local, const u8 *addr, @@ -459,6 +499,22 @@ void for_each_sta_info_type_check(struct ieee80211_local *local, _sta = nxt, \ nxt = _sta ? rcu_dereference(_sta->hnext) : NULL \ ) \ + /* run code only if address matches and it's not a dummy sta */ \ + if (memcmp(_sta->sta.addr, (_addr), ETH_ALEN) == 0 && \ + !_sta->dummy) + +#define for_each_sta_info_rx(local, _addr, _sta, nxt) \ + for ( /* initialise loop */ \ + _sta = rcu_dereference(local->sta_hash[STA_HASH(_addr)]),\ + nxt = _sta ? rcu_dereference(_sta->hnext) : NULL; \ + /* typecheck */ \ + for_each_sta_info_type_check(local, (_addr), _sta, nxt),\ + /* continue condition */ \ + _sta; \ + /* advance loop */ \ + _sta = nxt, \ + nxt = _sta ? rcu_dereference(_sta->hnext) : NULL \ + ) \ /* compare address and run code only if it matches */ \ if (memcmp(_sta->sta.addr, (_addr), ETH_ALEN) == 0) @@ -472,7 +528,10 @@ struct sta_info *sta_info_get_by_idx(struct ieee80211_sub_if_data *sdata, * until sta_info_insert(). */ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, - u8 *addr, gfp_t gfp); + const u8 *addr, gfp_t gfp); + +void sta_info_free(struct ieee80211_local *local, struct sta_info *sta); + /* * Insert STA info into hash table/list, returns zero or a * -EEXIST if (if the same MAC address is already present). @@ -483,15 +542,14 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, */ int sta_info_insert(struct sta_info *sta); int sta_info_insert_rcu(struct sta_info *sta) __acquires(RCU); -int sta_info_insert_atomic(struct sta_info *sta); +int sta_info_reinsert(struct sta_info *sta); int sta_info_destroy_addr(struct ieee80211_sub_if_data *sdata, const u8 *addr); int sta_info_destroy_addr_bss(struct ieee80211_sub_if_data *sdata, const u8 *addr); -void sta_info_set_tim_bit(struct sta_info *sta); -void sta_info_clear_tim_bit(struct sta_info *sta); +void sta_info_recalc_tim(struct sta_info *sta); void sta_info_init(struct ieee80211_local *local); void sta_info_stop(struct ieee80211_local *local); @@ -502,5 +560,6 @@ void ieee80211_sta_expire(struct ieee80211_sub_if_data *sdata, void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta); void ieee80211_sta_ps_deliver_poll_response(struct sta_info *sta); +void ieee80211_sta_ps_deliver_uapsd(struct sta_info *sta); #endif /* STA_INFO_H */ diff --git a/net/mac80211/status.c b/net/mac80211/status.c index 1658efaa2e8e..30c265c98f73 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -9,11 +9,13 @@ * published by the Free Software Foundation. */ +#include <linux/export.h> #include <net/mac80211.h> #include "ieee80211_i.h" #include "rate.h" #include "mesh.h" #include "led.h" +#include "wme.h" void ieee80211_tx_status_irqsafe(struct ieee80211_hw *hw, @@ -43,6 +45,8 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local, struct sk_buff *skb) { struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + struct ieee80211_hdr *hdr = (void *)skb->data; + int ac; /* * This skb 'survived' a round-trip through the driver, and @@ -63,11 +67,37 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local, sta->tx_filtered_count++; /* + * Clear more-data bit on filtered frames, it might be set + * but later frames might time out so it might have to be + * clear again ... It's all rather unlikely (this frame + * should time out first, right?) but let's not confuse + * peers unnecessarily. + */ + if (hdr->frame_control & cpu_to_le16(IEEE80211_FCTL_MOREDATA)) + hdr->frame_control &= ~cpu_to_le16(IEEE80211_FCTL_MOREDATA); + + if (ieee80211_is_data_qos(hdr->frame_control)) { + u8 *p = ieee80211_get_qos_ctl(hdr); + int tid = *p & IEEE80211_QOS_CTL_TID_MASK; + + /* + * Clear EOSP if set, this could happen e.g. + * if an absence period (us being a P2P GO) + * shortens the SP. + */ + if (*p & IEEE80211_QOS_CTL_EOSP) + *p &= ~IEEE80211_QOS_CTL_EOSP; + ac = ieee802_1d_to_ac[tid & 7]; + } else { + ac = IEEE80211_AC_BE; + } + + /* * Clear the TX filter mask for this STA when sending the next * packet. If the STA went to power save mode, this will happen * when it wakes up for the next time. */ - set_sta_flags(sta, WLAN_STA_CLEAR_PS_FILT); + set_sta_flag(sta, WLAN_STA_CLEAR_PS_FILT); /* * This code races in the following way: @@ -103,13 +133,19 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local, * changes before calling TX status events if ordering can be * unknown. */ - if (test_sta_flags(sta, WLAN_STA_PS_STA) && - skb_queue_len(&sta->tx_filtered) < STA_MAX_TX_BUFFER) { - skb_queue_tail(&sta->tx_filtered, skb); + if (test_sta_flag(sta, WLAN_STA_PS_STA) && + skb_queue_len(&sta->tx_filtered[ac]) < STA_MAX_TX_BUFFER) { + skb_queue_tail(&sta->tx_filtered[ac], skb); + sta_info_recalc_tim(sta); + + if (!timer_pending(&local->sta_cleanup)) + mod_timer(&local->sta_cleanup, + round_jiffies(jiffies + + STA_INFO_CLEANUP_INTERVAL)); return; } - if (!test_sta_flags(sta, WLAN_STA_PS_STA) && + if (!test_sta_flag(sta, WLAN_STA_PS_STA) && !(info->flags & IEEE80211_TX_INTFL_RETRIED)) { /* Software retry the packet once */ info->flags |= IEEE80211_TX_INTFL_RETRIED; @@ -121,18 +157,38 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local, if (net_ratelimit()) wiphy_debug(local->hw.wiphy, "dropped TX filtered frame, queue_len=%d PS=%d @%lu\n", - skb_queue_len(&sta->tx_filtered), - !!test_sta_flags(sta, WLAN_STA_PS_STA), jiffies); + skb_queue_len(&sta->tx_filtered[ac]), + !!test_sta_flag(sta, WLAN_STA_PS_STA), jiffies); #endif dev_kfree_skb(skb); } +static void ieee80211_check_pending_bar(struct sta_info *sta, u8 *addr, u8 tid) +{ + struct tid_ampdu_tx *tid_tx; + + tid_tx = rcu_dereference(sta->ampdu_mlme.tid_tx[tid]); + if (!tid_tx || !tid_tx->bar_pending) + return; + + tid_tx->bar_pending = false; + ieee80211_send_bar(&sta->sdata->vif, addr, tid, tid_tx->failed_bar_ssn); +} + static void ieee80211_frame_acked(struct sta_info *sta, struct sk_buff *skb) { struct ieee80211_mgmt *mgmt = (void *) skb->data; struct ieee80211_local *local = sta->local; struct ieee80211_sub_if_data *sdata = sta->sdata; + if (ieee80211_is_data_qos(mgmt->frame_control)) { + struct ieee80211_hdr *hdr = (void *) skb->data; + u8 *qc = ieee80211_get_qos_ctl(hdr); + u16 tid = qc[0] & 0xf; + + ieee80211_check_pending_bar(sta, hdr->addr1, tid); + } + if (ieee80211_is_action(mgmt->frame_control) && sdata->vif.type == NL80211_IFTYPE_STATION && mgmt->u.action.category == WLAN_CATEGORY_HT && @@ -161,6 +217,114 @@ static void ieee80211_frame_acked(struct sta_info *sta, struct sk_buff *skb) } } +static void ieee80211_set_bar_pending(struct sta_info *sta, u8 tid, u16 ssn) +{ + struct tid_ampdu_tx *tid_tx; + + tid_tx = rcu_dereference(sta->ampdu_mlme.tid_tx[tid]); + if (!tid_tx) + return; + + tid_tx->failed_bar_ssn = ssn; + tid_tx->bar_pending = true; +} + +static int ieee80211_tx_radiotap_len(struct ieee80211_tx_info *info) +{ + int len = sizeof(struct ieee80211_radiotap_header); + + /* IEEE80211_RADIOTAP_RATE rate */ + if (info->status.rates[0].idx >= 0 && + !(info->status.rates[0].flags & IEEE80211_TX_RC_MCS)) + len += 2; + + /* IEEE80211_RADIOTAP_TX_FLAGS */ + len += 2; + + /* IEEE80211_RADIOTAP_DATA_RETRIES */ + len += 1; + + /* IEEE80211_TX_RC_MCS */ + if (info->status.rates[0].idx >= 0 && + info->status.rates[0].flags & IEEE80211_TX_RC_MCS) + len += 3; + + return len; +} + +static void ieee80211_add_tx_radiotap_header(struct ieee80211_supported_band + *sband, struct sk_buff *skb, + int retry_count, int rtap_len) +{ + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; + struct ieee80211_radiotap_header *rthdr; + unsigned char *pos; + u16 txflags; + + rthdr = (struct ieee80211_radiotap_header *) skb_push(skb, rtap_len); + + memset(rthdr, 0, rtap_len); + rthdr->it_len = cpu_to_le16(rtap_len); + rthdr->it_present = + cpu_to_le32((1 << IEEE80211_RADIOTAP_TX_FLAGS) | + (1 << IEEE80211_RADIOTAP_DATA_RETRIES)); + pos = (unsigned char *)(rthdr + 1); + + /* + * XXX: Once radiotap gets the bitmap reset thing the vendor + * extensions proposal contains, we can actually report + * the whole set of tries we did. + */ + + /* IEEE80211_RADIOTAP_RATE */ + if (info->status.rates[0].idx >= 0 && + !(info->status.rates[0].flags & IEEE80211_TX_RC_MCS)) { + rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_RATE); + *pos = sband->bitrates[info->status.rates[0].idx].bitrate / 5; + /* padding for tx flags */ + pos += 2; + } + + /* IEEE80211_RADIOTAP_TX_FLAGS */ + txflags = 0; + if (!(info->flags & IEEE80211_TX_STAT_ACK) && + !is_multicast_ether_addr(hdr->addr1)) + txflags |= IEEE80211_RADIOTAP_F_TX_FAIL; + + if ((info->status.rates[0].flags & IEEE80211_TX_RC_USE_RTS_CTS) || + (info->status.rates[0].flags & IEEE80211_TX_RC_USE_CTS_PROTECT)) + txflags |= IEEE80211_RADIOTAP_F_TX_CTS; + else if (info->status.rates[0].flags & IEEE80211_TX_RC_USE_RTS_CTS) + txflags |= IEEE80211_RADIOTAP_F_TX_RTS; + + put_unaligned_le16(txflags, pos); + pos += 2; + + /* IEEE80211_RADIOTAP_DATA_RETRIES */ + /* for now report the total retry_count */ + *pos = retry_count; + pos++; + + /* IEEE80211_TX_RC_MCS */ + if (info->status.rates[0].idx >= 0 && + info->status.rates[0].flags & IEEE80211_TX_RC_MCS) { + rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_MCS); + pos[0] = IEEE80211_RADIOTAP_MCS_HAVE_MCS | + IEEE80211_RADIOTAP_MCS_HAVE_GI | + IEEE80211_RADIOTAP_MCS_HAVE_BW; + if (info->status.rates[0].flags & IEEE80211_TX_RC_SHORT_GI) + pos[1] |= IEEE80211_RADIOTAP_MCS_SGI; + if (info->status.rates[0].flags & IEEE80211_TX_RC_40_MHZ_WIDTH) + pos[1] |= IEEE80211_RADIOTAP_MCS_BW_40; + if (info->status.rates[0].flags & IEEE80211_TX_RC_GREEN_FIELD) + pos[1] |= IEEE80211_RADIOTAP_MCS_FMT_GF; + pos[2] = info->status.rates[0].idx; + pos += 3; + } + +} + /* * Use a static threshold for now, best value to be determined * by testing ... @@ -176,10 +340,8 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; struct ieee80211_local *local = hw_to_local(hw); struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); - u16 frag, type; __le16 fc; struct ieee80211_supported_band *sband; - struct ieee80211_tx_status_rtap_hdr *rthdr; struct ieee80211_sub_if_data *sdata; struct net_device *prev_dev = NULL; struct sta_info *sta, *tmp; @@ -187,6 +349,9 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) int rates_idx = -1; bool send_to_cooked; bool acked; + struct ieee80211_bar *bar; + u16 tid; + int rtap_len; for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) { if (info->status.rates[i].idx < 0) { @@ -215,8 +380,11 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) if (memcmp(hdr->addr2, sta->sdata->vif.addr, ETH_ALEN)) continue; + if (info->flags & IEEE80211_TX_STATUS_EOSP) + clear_sta_flag(sta, WLAN_STA_SP); + acked = !!(info->flags & IEEE80211_TX_STAT_ACK); - if (!acked && test_sta_flags(sta, WLAN_STA_PS_STA)) { + if (!acked && test_sta_flag(sta, WLAN_STA_PS_STA)) { /* * The STA is in power save mode, so assume * that this TX packet failed because of that. @@ -239,10 +407,31 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) tid = qc[0] & 0xf; ssn = ((le16_to_cpu(hdr->seq_ctrl) + 0x10) & IEEE80211_SCTL_SEQ); - ieee80211_send_bar(sta->sdata, hdr->addr1, + ieee80211_send_bar(&sta->sdata->vif, hdr->addr1, tid, ssn); } + if (!acked && ieee80211_is_back_req(fc)) { + u16 control; + + /* + * BAR failed, store the last SSN and retry sending + * the BAR when the next unicast transmission on the + * same TID succeeds. + */ + bar = (struct ieee80211_bar *) skb->data; + control = le16_to_cpu(bar->control); + if (!(control & IEEE80211_BAR_CTRL_MULTI_TID)) { + u16 ssn = le16_to_cpu(bar->start_seq_num); + + tid = (control & + IEEE80211_BAR_CTRL_TID_INFO_MASK) >> + IEEE80211_BAR_CTRL_TID_INFO_SHIFT; + + ieee80211_set_bar_pending(sta, tid, ssn); + } + } + if (info->flags & IEEE80211_TX_STAT_TX_FILTERED) { ieee80211_handle_filtered_frame(local, sta, skb); rcu_read_unlock(); @@ -286,12 +475,8 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) * Fragments are passed to low-level drivers as separate skbs, so these * are actually fragments, not frames. Update frame counters only for * the first fragment of the frame. */ - - frag = le16_to_cpu(hdr->seq_ctrl) & IEEE80211_SCTL_FRAG; - type = le16_to_cpu(hdr->frame_control) & IEEE80211_FCTL_FTYPE; - if (info->flags & IEEE80211_TX_STAT_ACK) { - if (frag == 0) { + if (ieee80211_is_first_frag(hdr->seq_ctrl)) { local->dot11TransmittedFrameCount++; if (is_multicast_ether_addr(hdr->addr1)) local->dot11MulticastTransmittedFrameCount++; @@ -306,11 +491,11 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) * with a multicast address in the address 1 field of type Data * or Management. */ if (!is_multicast_ether_addr(hdr->addr1) || - type == IEEE80211_FTYPE_DATA || - type == IEEE80211_FTYPE_MGMT) + ieee80211_is_data(fc) || + ieee80211_is_mgmt(fc)) local->dot11TransmittedFragmentCount++; } else { - if (frag == 0) + if (ieee80211_is_first_frag(hdr->seq_ctrl)) local->dot11FailedCount++; } @@ -327,30 +512,54 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) } if (info->flags & IEEE80211_TX_INTFL_NL80211_FRAME_TX) { - struct ieee80211_work *wk; u64 cookie = (unsigned long)skb; - rcu_read_lock(); - list_for_each_entry_rcu(wk, &local->work_list, list) { - if (wk->type != IEEE80211_WORK_OFFCHANNEL_TX) - continue; - if (wk->offchan_tx.frame != skb) - continue; - wk->offchan_tx.frame = NULL; - break; - } - rcu_read_unlock(); - if (local->hw_roc_skb_for_status == skb) { - cookie = local->hw_roc_cookie ^ 2; - local->hw_roc_skb_for_status = NULL; - } + if (ieee80211_is_nullfunc(hdr->frame_control) || + ieee80211_is_qos_nullfunc(hdr->frame_control)) { + bool acked = info->flags & IEEE80211_TX_STAT_ACK; + cfg80211_probe_status(skb->dev, hdr->addr1, + cookie, acked, GFP_ATOMIC); + } else { + struct ieee80211_work *wk; + + rcu_read_lock(); + list_for_each_entry_rcu(wk, &local->work_list, list) { + if (wk->type != IEEE80211_WORK_OFFCHANNEL_TX) + continue; + if (wk->offchan_tx.frame != skb) + continue; + wk->offchan_tx.status = true; + break; + } + rcu_read_unlock(); + if (local->hw_roc_skb_for_status == skb) { + cookie = local->hw_roc_cookie ^ 2; + local->hw_roc_skb_for_status = NULL; + } - if (cookie == local->hw_offchan_tx_cookie) - local->hw_offchan_tx_cookie = 0; + cfg80211_mgmt_tx_status( + skb->dev, cookie, skb->data, skb->len, + !!(info->flags & IEEE80211_TX_STAT_ACK), + GFP_ATOMIC); + } + } - cfg80211_mgmt_tx_status( - skb->dev, cookie, skb->data, skb->len, - !!(info->flags & IEEE80211_TX_STAT_ACK), GFP_ATOMIC); + if (unlikely(info->ack_frame_id)) { + struct sk_buff *ack_skb; + unsigned long flags; + + spin_lock_irqsave(&local->ack_status_lock, flags); + ack_skb = idr_find(&local->ack_status_frames, + info->ack_frame_id); + if (ack_skb) + idr_remove(&local->ack_status_frames, + info->ack_frame_id); + spin_unlock_irqrestore(&local->ack_status_lock, flags); + + /* consumes ack_skb */ + if (ack_skb) + skb_complete_wifi_ack(ack_skb, + info->flags & IEEE80211_TX_STAT_ACK); } /* this was a transmitted frame, but now we want to reuse it */ @@ -358,7 +567,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) /* Need to make a copy before skb->cb gets cleared */ send_to_cooked = !!(info->flags & IEEE80211_TX_CTL_INJECTED) || - (type != IEEE80211_FTYPE_DATA); + !(ieee80211_is_data(fc)); /* * This is a bit racy but we can avoid a lot of work @@ -370,44 +579,13 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) } /* send frame to monitor interfaces now */ - - if (skb_headroom(skb) < sizeof(*rthdr)) { + rtap_len = ieee80211_tx_radiotap_len(info); + if (WARN_ON_ONCE(skb_headroom(skb) < rtap_len)) { printk(KERN_ERR "ieee80211_tx_status: headroom too small\n"); dev_kfree_skb(skb); return; } - - rthdr = (struct ieee80211_tx_status_rtap_hdr *) - skb_push(skb, sizeof(*rthdr)); - - memset(rthdr, 0, sizeof(*rthdr)); - rthdr->hdr.it_len = cpu_to_le16(sizeof(*rthdr)); - rthdr->hdr.it_present = - cpu_to_le32((1 << IEEE80211_RADIOTAP_TX_FLAGS) | - (1 << IEEE80211_RADIOTAP_DATA_RETRIES) | - (1 << IEEE80211_RADIOTAP_RATE)); - - if (!(info->flags & IEEE80211_TX_STAT_ACK) && - !is_multicast_ether_addr(hdr->addr1)) - rthdr->tx_flags |= cpu_to_le16(IEEE80211_RADIOTAP_F_TX_FAIL); - - /* - * XXX: Once radiotap gets the bitmap reset thing the vendor - * extensions proposal contains, we can actually report - * the whole set of tries we did. - */ - if ((info->status.rates[0].flags & IEEE80211_TX_RC_USE_RTS_CTS) || - (info->status.rates[0].flags & IEEE80211_TX_RC_USE_CTS_PROTECT)) - rthdr->tx_flags |= cpu_to_le16(IEEE80211_RADIOTAP_F_TX_CTS); - else if (info->status.rates[0].flags & IEEE80211_TX_RC_USE_RTS_CTS) - rthdr->tx_flags |= cpu_to_le16(IEEE80211_RADIOTAP_F_TX_RTS); - if (info->status.rates[0].idx >= 0 && - !(info->status.rates[0].flags & IEEE80211_TX_RC_MCS)) - rthdr->rate = sband->bitrates[ - info->status.rates[0].idx].bitrate / 5; - - /* for now report the total retry_count */ - rthdr->data_retries = retry_count; + ieee80211_add_tx_radiotap_header(sband, skb, retry_count, rtap_len); /* XXX: is this sufficient for BPF? */ skb_set_mac_header(skb, 0); @@ -454,3 +632,29 @@ void ieee80211_report_low_ack(struct ieee80211_sta *pubsta, u32 num_packets) num_packets, GFP_ATOMIC); } EXPORT_SYMBOL(ieee80211_report_low_ack); + +void ieee80211_free_txskb(struct ieee80211_hw *hw, struct sk_buff *skb) +{ + struct ieee80211_local *local = hw_to_local(hw); + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + + if (unlikely(info->ack_frame_id)) { + struct sk_buff *ack_skb; + unsigned long flags; + + spin_lock_irqsave(&local->ack_status_lock, flags); + ack_skb = idr_find(&local->ack_status_frames, + info->ack_frame_id); + if (ack_skb) + idr_remove(&local->ack_status_frames, + info->ack_frame_id); + spin_unlock_irqrestore(&local->ack_status_lock, flags); + + /* consumes ack_skb */ + if (ack_skb) + dev_kfree_skb_any(ack_skb); + } + + dev_kfree_skb_any(skb); +} +EXPORT_SYMBOL(ieee80211_free_txskb); diff --git a/net/mac80211/tkip.c b/net/mac80211/tkip.c index f49d00a4c7fd..51077a956a83 100644 --- a/net/mac80211/tkip.c +++ b/net/mac80211/tkip.c @@ -10,6 +10,7 @@ #include <linux/bitops.h> #include <linux/types.h> #include <linux/netdevice.h> +#include <linux/export.h> #include <asm/unaligned.h> #include <net/mac80211.h> diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 8cb0d2d0ac69..edcd1c7ab83f 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -18,6 +18,7 @@ #include <linux/etherdevice.h> #include <linux/bitmap.h> #include <linux/rcupdate.h> +#include <linux/export.h> #include <net/net_namespace.h> #include <net/ieee80211_radiotap.h> #include <net/cfg80211.h> @@ -35,7 +36,8 @@ /* misc utils */ -static __le16 ieee80211_duration(struct ieee80211_tx_data *tx, int group_addr, +static __le16 ieee80211_duration(struct ieee80211_tx_data *tx, + struct sk_buff *skb, int group_addr, int next_frag_len) { int rate, mrate, erp, dur, i; @@ -43,7 +45,7 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx, int group_addr, struct ieee80211_local *local = tx->local; struct ieee80211_supported_band *sband; struct ieee80211_hdr *hdr; - struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb); + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); /* assume HW handles this */ if (info->control.rates[0].flags & IEEE80211_TX_RC_MCS) @@ -75,7 +77,7 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx, int group_addr, * at the highest possible rate belonging to the PHY rates in the * BSSBasicRateSet */ - hdr = (struct ieee80211_hdr *)tx->skb->data; + hdr = (struct ieee80211_hdr *)skb->data; if (ieee80211_is_ctl(hdr->frame_control)) { /* TODO: These control frames are not currently sent by * mac80211, but should they be implemented, this function @@ -149,11 +151,15 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx, int group_addr, rate = mrate; } - /* Time needed to transmit ACK - * (10 bytes + 4-byte FCS = 112 bits) plus SIFS; rounded up - * to closest integer */ - - dur = ieee80211_frame_duration(local, 10, rate, erp, + /* Don't calculate ACKs for QoS Frames with NoAck Policy set */ + if (ieee80211_is_data_qos(hdr->frame_control) && + *(ieee80211_get_qos_ctl(hdr)) | IEEE80211_QOS_CTL_ACK_POLICY_NOACK) + dur = 0; + else + /* Time needed to transmit ACK + * (10 bytes + 4-byte FCS = 112 bits) plus SIFS; rounded up + * to closest integer */ + dur = ieee80211_frame_duration(local, 10, rate, erp, tx->sdata->vif.bss_conf.use_short_preamble); if (next_frag_len) { @@ -253,7 +259,7 @@ ieee80211_tx_h_check_assoc(struct ieee80211_tx_data *tx) struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb); - u32 sta_flags; + bool assoc = false; if (unlikely(info->flags & IEEE80211_TX_CTL_INJECTED)) return TX_CONTINUE; @@ -284,11 +290,11 @@ ieee80211_tx_h_check_assoc(struct ieee80211_tx_data *tx) if (tx->flags & IEEE80211_TX_PS_BUFFERED) return TX_CONTINUE; - sta_flags = tx->sta ? get_sta_flags(tx->sta) : 0; + if (tx->sta) + assoc = test_sta_flag(tx->sta, WLAN_STA_ASSOC); if (likely(tx->flags & IEEE80211_TX_UNICAST)) { - if (unlikely(!(sta_flags & WLAN_STA_ASSOC) && - tx->sdata->vif.type != NL80211_IFTYPE_ADHOC && + if (unlikely(!assoc && ieee80211_is_data(hdr->frame_control))) { #ifdef CONFIG_MAC80211_VERBOSE_DEBUG printk(KERN_DEBUG "%s: dropped data frame to not " @@ -298,17 +304,14 @@ ieee80211_tx_h_check_assoc(struct ieee80211_tx_data *tx) I802_DEBUG_INC(tx->local->tx_handlers_drop_not_assoc); return TX_DROP; } - } else { - if (unlikely(ieee80211_is_data(hdr->frame_control) && - tx->local->num_sta == 0 && - tx->sdata->vif.type != NL80211_IFTYPE_ADHOC)) { - /* - * No associated STAs - no need to send multicast - * frames. - */ - return TX_DROP; - } - return TX_CONTINUE; + } else if (unlikely(tx->sdata->vif.type == NL80211_IFTYPE_AP && + ieee80211_is_data(hdr->frame_control) && + !atomic_read(&tx->sdata->u.ap.num_sta_authorized))) { + /* + * No associated STAs - no need to send multicast + * frames. + */ + return TX_DROP; } return TX_CONTINUE; @@ -343,13 +346,22 @@ static void purge_old_ps_buffers(struct ieee80211_local *local) total += skb_queue_len(&ap->ps_bc_buf); } + /* + * Drop one frame from each station from the lowest-priority + * AC that has frames at all. + */ list_for_each_entry_rcu(sta, &local->sta_list, list) { - skb = skb_dequeue(&sta->ps_tx_buf); - if (skb) { - purged++; - dev_kfree_skb(skb); + int ac; + + for (ac = IEEE80211_AC_BK; ac >= IEEE80211_AC_VO; ac--) { + skb = skb_dequeue(&sta->ps_tx_buf[ac]); + total += skb_queue_len(&sta->ps_tx_buf[ac]); + if (skb) { + purged++; + dev_kfree_skb(skb); + break; + } } - total += skb_queue_len(&sta->ps_tx_buf); } rcu_read_unlock(); @@ -418,7 +430,7 @@ static int ieee80211_use_mfp(__le16 fc, struct sta_info *sta, if (!ieee80211_is_mgmt(fc)) return 0; - if (sta == NULL || !test_sta_flags(sta, WLAN_STA_MFP)) + if (sta == NULL || !test_sta_flag(sta, WLAN_STA_MFP)) return 0; if (!ieee80211_is_robust_mgmt_frame((struct ieee80211_hdr *) @@ -435,7 +447,6 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx) struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb); struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data; struct ieee80211_local *local = tx->local; - u32 staflags; if (unlikely(!sta || ieee80211_is_probe_resp(hdr->frame_control) || @@ -444,57 +455,52 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx) ieee80211_is_reassoc_resp(hdr->frame_control))) return TX_CONTINUE; - staflags = get_sta_flags(sta); + if (unlikely((test_sta_flag(sta, WLAN_STA_PS_STA) || + test_sta_flag(sta, WLAN_STA_PS_DRIVER)) && + !(info->flags & IEEE80211_TX_CTL_POLL_RESPONSE))) { + int ac = skb_get_queue_mapping(tx->skb); - if (unlikely((staflags & (WLAN_STA_PS_STA | WLAN_STA_PS_DRIVER)) && - !(info->flags & IEEE80211_TX_CTL_PSPOLL_RESPONSE))) { #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG - printk(KERN_DEBUG "STA %pM aid %d: PS buffer (entries " - "before %d)\n", - sta->sta.addr, sta->sta.aid, - skb_queue_len(&sta->ps_tx_buf)); + printk(KERN_DEBUG "STA %pM aid %d: PS buffer for AC %d\n", + sta->sta.addr, sta->sta.aid, ac); #endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ if (tx->local->total_ps_buffered >= TOTAL_MAX_TX_BUFFER) purge_old_ps_buffers(tx->local); - if (skb_queue_len(&sta->ps_tx_buf) >= STA_MAX_TX_BUFFER) { - struct sk_buff *old = skb_dequeue(&sta->ps_tx_buf); + if (skb_queue_len(&sta->ps_tx_buf[ac]) >= STA_MAX_TX_BUFFER) { + struct sk_buff *old = skb_dequeue(&sta->ps_tx_buf[ac]); #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG - if (net_ratelimit()) { - printk(KERN_DEBUG "%s: STA %pM TX " - "buffer full - dropping oldest frame\n", - tx->sdata->name, sta->sta.addr); - } + if (net_ratelimit()) + printk(KERN_DEBUG "%s: STA %pM TX buffer for " + "AC %d full - dropping oldest frame\n", + tx->sdata->name, sta->sta.addr, ac); #endif dev_kfree_skb(old); } else tx->local->total_ps_buffered++; - /* - * Queue frame to be sent after STA wakes up/polls, - * but don't set the TIM bit if the driver is blocking - * wakeup or poll response transmissions anyway. - */ - if (skb_queue_empty(&sta->ps_tx_buf) && - !(staflags & WLAN_STA_PS_DRIVER)) - sta_info_set_tim_bit(sta); - info->control.jiffies = jiffies; info->control.vif = &tx->sdata->vif; info->flags |= IEEE80211_TX_INTFL_NEED_TXPROCESSING; - skb_queue_tail(&sta->ps_tx_buf, tx->skb); + skb_queue_tail(&sta->ps_tx_buf[ac], tx->skb); if (!timer_pending(&local->sta_cleanup)) mod_timer(&local->sta_cleanup, round_jiffies(jiffies + STA_INFO_CLEANUP_INTERVAL)); + /* + * We queued up some frames, so the TIM bit might + * need to be set, recalculate it. + */ + sta_info_recalc_tim(sta); + return TX_QUEUED; } #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG - else if (unlikely(staflags & WLAN_STA_PS_STA)) { - printk(KERN_DEBUG "%s: STA %pM in PS mode, but pspoll " - "set -> send frame\n", tx->sdata->name, - sta->sta.addr); + else if (unlikely(test_sta_flag(sta, WLAN_STA_PS_STA))) { + printk(KERN_DEBUG + "%s: STA %pM in PS mode, but polling/in SP -> send frame\n", + tx->sdata->name, sta->sta.addr); } #endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ @@ -552,7 +558,7 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx) !(info->flags & IEEE80211_TX_CTL_INJECTED) && (!ieee80211_is_robust_mgmt_frame(hdr) || (ieee80211_is_action(hdr->frame_control) && - tx->sta && test_sta_flags(tx->sta, WLAN_STA_MFP)))) { + tx->sta && test_sta_flag(tx->sta, WLAN_STA_MFP)))) { I802_DEBUG_INC(tx->local->tx_handlers_drop_unencrypted); return TX_DROP; } else @@ -567,8 +573,6 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx) switch (tx->key->conf.cipher) { case WLAN_CIPHER_SUITE_WEP40: case WLAN_CIPHER_SUITE_WEP104: - if (ieee80211_is_auth(hdr->frame_control)) - break; case WLAN_CIPHER_SUITE_TKIP: if (!ieee80211_is_data_present(hdr->frame_control)) tx->key = NULL; @@ -611,7 +615,7 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx) u32 len; bool inval = false, rts = false, short_preamble = false; struct ieee80211_tx_rate_control txrc; - u32 sta_flags; + bool assoc = false; memset(&txrc, 0, sizeof(txrc)); @@ -632,6 +636,7 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx) else txrc.max_rate_idx = fls(txrc.rate_idx_mask) - 1; txrc.bss = (tx->sdata->vif.type == NL80211_IFTYPE_AP || + tx->sdata->vif.type == NL80211_IFTYPE_MESH_POINT || tx->sdata->vif.type == NL80211_IFTYPE_ADHOC); /* set up RTS protection if desired */ @@ -647,17 +652,17 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx) */ if (tx->sdata->vif.bss_conf.use_short_preamble && (ieee80211_is_data(hdr->frame_control) || - (tx->sta && test_sta_flags(tx->sta, WLAN_STA_SHORT_PREAMBLE)))) + (tx->sta && test_sta_flag(tx->sta, WLAN_STA_SHORT_PREAMBLE)))) txrc.short_preamble = short_preamble = true; - sta_flags = tx->sta ? get_sta_flags(tx->sta) : 0; + if (tx->sta) + assoc = test_sta_flag(tx->sta, WLAN_STA_ASSOC); /* * Lets not bother rate control if we're associated and cannot * talk to the sta. This should not happen. */ - if (WARN(test_bit(SCAN_SW_SCANNING, &tx->local->scanning) && - (sta_flags & WLAN_STA_ASSOC) && + if (WARN(test_bit(SCAN_SW_SCANNING, &tx->local->scanning) && assoc && !rate_usable_index_exists(sband, &tx->sta->sta), "%s: Dropped data frame as no usable bitrate found while " "scanning and associated. Target station: " @@ -800,6 +805,9 @@ ieee80211_tx_h_sequence(struct ieee80211_tx_data *tx) if (ieee80211_hdrlen(hdr->frame_control) < 24) return TX_CONTINUE; + if (ieee80211_is_qos_nullfunc(hdr->frame_control)) + return TX_CONTINUE; + /* * Anything but QoS data that has a sequence number field * (is long enough) gets a sequence number from the global @@ -836,11 +844,13 @@ ieee80211_tx_h_sequence(struct ieee80211_tx_data *tx) return TX_CONTINUE; } -static int ieee80211_fragment(struct ieee80211_local *local, +static int ieee80211_fragment(struct ieee80211_tx_data *tx, struct sk_buff *skb, int hdrlen, int frag_threshold) { - struct sk_buff *tail = skb, *tmp; + struct ieee80211_local *local = tx->local; + struct ieee80211_tx_info *info; + struct sk_buff *tmp; int per_fragm = frag_threshold - hdrlen - FCS_LEN; int pos = hdrlen + per_fragm; int rem = skb->len - hdrlen - per_fragm; @@ -848,6 +858,8 @@ static int ieee80211_fragment(struct ieee80211_local *local, if (WARN_ON(rem < 0)) return -EINVAL; + /* first fragment was already added to queue by caller */ + while (rem) { int fraglen = per_fragm; @@ -860,12 +872,21 @@ static int ieee80211_fragment(struct ieee80211_local *local, IEEE80211_ENCRYPT_TAILROOM); if (!tmp) return -ENOMEM; - tail->next = tmp; - tail = tmp; + + __skb_queue_tail(&tx->skbs, tmp); + skb_reserve(tmp, local->tx_headroom + IEEE80211_ENCRYPT_HEADROOM); /* copy control information */ memcpy(tmp->cb, skb->cb, sizeof(tmp->cb)); + + info = IEEE80211_SKB_CB(tmp); + info->flags &= ~(IEEE80211_TX_CTL_CLEAR_PS_FILT | + IEEE80211_TX_CTL_FIRST_FRAGMENT); + + if (rem) + info->flags |= IEEE80211_TX_CTL_MORE_FRAMES; + skb_copy_queue_mapping(tmp, skb); tmp->priority = skb->priority; tmp->dev = skb->dev; @@ -877,6 +898,7 @@ static int ieee80211_fragment(struct ieee80211_local *local, pos += fraglen; } + /* adjust first fragment's length */ skb->len = hdrlen + per_fragm; return 0; } @@ -891,7 +913,14 @@ ieee80211_tx_h_fragment(struct ieee80211_tx_data *tx) int hdrlen; int fragnum; - if (!(tx->flags & IEEE80211_TX_FRAGMENTED)) + /* no matter what happens, tx->skb moves to tx->skbs */ + __skb_queue_tail(&tx->skbs, skb); + tx->skb = NULL; + + if (info->flags & IEEE80211_TX_CTL_DONTFRAG) + return TX_CONTINUE; + + if (tx->local->ops->set_frag_threshold) return TX_CONTINUE; /* @@ -904,7 +933,7 @@ ieee80211_tx_h_fragment(struct ieee80211_tx_data *tx) hdrlen = ieee80211_hdrlen(hdr->frame_control); - /* internal error, why is TX_FRAGMENTED set? */ + /* internal error, why isn't DONTFRAG set? */ if (WARN_ON(skb->len + FCS_LEN <= frag_threshold)) return TX_DROP; @@ -916,21 +945,21 @@ ieee80211_tx_h_fragment(struct ieee80211_tx_data *tx) * of the fragments then we will simply pretend to accept the skb * but store it away as pending. */ - if (ieee80211_fragment(tx->local, skb, hdrlen, frag_threshold)) + if (ieee80211_fragment(tx, skb, hdrlen, frag_threshold)) return TX_DROP; /* update duration/seq/flags of fragments */ fragnum = 0; - do { + + skb_queue_walk(&tx->skbs, skb) { int next_len; const __le16 morefrags = cpu_to_le16(IEEE80211_FCTL_MOREFRAGS); hdr = (void *)skb->data; info = IEEE80211_SKB_CB(skb); - if (skb->next) { + if (!skb_queue_is_last(&tx->skbs, skb)) { hdr->frame_control |= morefrags; - next_len = skb->next->len; /* * No multi-rate retries for fragmented frames, that * would completely throw off the NAV at other STAs. @@ -945,10 +974,9 @@ ieee80211_tx_h_fragment(struct ieee80211_tx_data *tx) hdr->frame_control &= ~morefrags; next_len = 0; } - hdr->duration_id = ieee80211_duration(tx, 0, next_len); hdr->seq_ctrl |= cpu_to_le16(fragnum & IEEE80211_SCTL_FRAG); fragnum++; - } while ((skb = skb->next)); + } return TX_CONTINUE; } @@ -956,16 +984,16 @@ ieee80211_tx_h_fragment(struct ieee80211_tx_data *tx) static ieee80211_tx_result debug_noinline ieee80211_tx_h_stats(struct ieee80211_tx_data *tx) { - struct sk_buff *skb = tx->skb; + struct sk_buff *skb; if (!tx->sta) return TX_CONTINUE; tx->sta->tx_packets++; - do { + skb_queue_walk(&tx->skbs, skb) { tx->sta->tx_fragments++; tx->sta->tx_bytes += skb->len; - } while ((skb = skb->next)); + } return TX_CONTINUE; } @@ -1004,121 +1032,31 @@ ieee80211_tx_h_encrypt(struct ieee80211_tx_data *tx) static ieee80211_tx_result debug_noinline ieee80211_tx_h_calculate_duration(struct ieee80211_tx_data *tx) { - struct sk_buff *skb = tx->skb; + struct sk_buff *skb; struct ieee80211_hdr *hdr; int next_len; bool group_addr; - do { + skb_queue_walk(&tx->skbs, skb) { hdr = (void *) skb->data; if (unlikely(ieee80211_is_pspoll(hdr->frame_control))) break; /* must not overwrite AID */ - next_len = skb->next ? skb->next->len : 0; + if (!skb_queue_is_last(&tx->skbs, skb)) { + struct sk_buff *next = skb_queue_next(&tx->skbs, skb); + next_len = next->len; + } else + next_len = 0; group_addr = is_multicast_ether_addr(hdr->addr1); hdr->duration_id = - ieee80211_duration(tx, group_addr, next_len); - } while ((skb = skb->next)); + ieee80211_duration(tx, skb, group_addr, next_len); + } return TX_CONTINUE; } /* actual transmit path */ -/* - * deal with packet injection down monitor interface - * with Radiotap Header -- only called for monitor mode interface - */ -static bool __ieee80211_parse_tx_radiotap(struct ieee80211_tx_data *tx, - struct sk_buff *skb) -{ - /* - * this is the moment to interpret and discard the radiotap header that - * must be at the start of the packet injected in Monitor mode - * - * Need to take some care with endian-ness since radiotap - * args are little-endian - */ - - struct ieee80211_radiotap_iterator iterator; - struct ieee80211_radiotap_header *rthdr = - (struct ieee80211_radiotap_header *) skb->data; - bool hw_frag; - struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); - int ret = ieee80211_radiotap_iterator_init(&iterator, rthdr, skb->len, - NULL); - - info->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT; - tx->flags &= ~IEEE80211_TX_FRAGMENTED; - - /* packet is fragmented in HW if we have a non-NULL driver callback */ - hw_frag = (tx->local->ops->set_frag_threshold != NULL); - - /* - * for every radiotap entry that is present - * (ieee80211_radiotap_iterator_next returns -ENOENT when no more - * entries present, or -EINVAL on error) - */ - - while (!ret) { - ret = ieee80211_radiotap_iterator_next(&iterator); - - if (ret) - continue; - - /* see if this argument is something we can use */ - switch (iterator.this_arg_index) { - /* - * You must take care when dereferencing iterator.this_arg - * for multibyte types... the pointer is not aligned. Use - * get_unaligned((type *)iterator.this_arg) to dereference - * iterator.this_arg for type "type" safely on all arches. - */ - case IEEE80211_RADIOTAP_FLAGS: - if (*iterator.this_arg & IEEE80211_RADIOTAP_F_FCS) { - /* - * this indicates that the skb we have been - * handed has the 32-bit FCS CRC at the end... - * we should react to that by snipping it off - * because it will be recomputed and added - * on transmission - */ - if (skb->len < (iterator._max_length + FCS_LEN)) - return false; - - skb_trim(skb, skb->len - FCS_LEN); - } - if (*iterator.this_arg & IEEE80211_RADIOTAP_F_WEP) - info->flags &= ~IEEE80211_TX_INTFL_DONT_ENCRYPT; - if ((*iterator.this_arg & IEEE80211_RADIOTAP_F_FRAG) && - !hw_frag) - tx->flags |= IEEE80211_TX_FRAGMENTED; - break; - - /* - * Please update the file - * Documentation/networking/mac80211-injection.txt - * when parsing new fields here. - */ - - default: - break; - } - } - - if (ret != -ENOENT) /* ie, if we didn't simply run out of fields */ - return false; - - /* - * remove the radiotap header - * iterator->_max_length was sanity-checked against - * skb->len by iterator init - */ - skb_pull(skb, iterator._max_length); - - return true; -} - static bool ieee80211_tx_prep_agg(struct ieee80211_tx_data *tx, struct sk_buff *skb, struct ieee80211_tx_info *info, @@ -1126,9 +1064,11 @@ static bool ieee80211_tx_prep_agg(struct ieee80211_tx_data *tx, int tid) { bool queued = false; + bool reset_agg_timer = false; if (test_bit(HT_AGG_STATE_OPERATIONAL, &tid_tx->state)) { info->flags |= IEEE80211_TX_CTL_AMPDU; + reset_agg_timer = true; } else if (test_bit(HT_AGG_STATE_WANT_START, &tid_tx->state)) { /* * nothing -- this aggregation session is being started @@ -1160,6 +1100,7 @@ static bool ieee80211_tx_prep_agg(struct ieee80211_tx_data *tx, /* do nothing, let packet pass through */ } else if (test_bit(HT_AGG_STATE_OPERATIONAL, &tid_tx->state)) { info->flags |= IEEE80211_TX_CTL_AMPDU; + reset_agg_timer = true; } else { queued = true; info->control.vif = &tx->sdata->vif; @@ -1169,6 +1110,11 @@ static bool ieee80211_tx_prep_agg(struct ieee80211_tx_data *tx, spin_unlock(&tx->sta->lock); } + /* reset session timer */ + if (reset_agg_timer && tid_tx->timeout) + mod_timer(&tid_tx->session_timer, + TU_TO_EXP_TIME(tid_tx->timeout)); + return queued; } @@ -1183,7 +1129,7 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata, struct ieee80211_local *local = sdata->local; struct ieee80211_hdr *hdr; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); - int hdrlen, tid; + int tid; u8 *qc; memset(tx, 0, sizeof(*tx)); @@ -1191,26 +1137,7 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata, tx->local = local; tx->sdata = sdata; tx->channel = local->hw.conf.channel; - /* - * Set this flag (used below to indicate "automatic fragmentation"), - * it will be cleared/left by radiotap as desired. - * Only valid when fragmentation is done by the stack. - */ - if (!local->ops->set_frag_threshold) - tx->flags |= IEEE80211_TX_FRAGMENTED; - - /* process and remove the injection radiotap header */ - if (unlikely(info->flags & IEEE80211_TX_INTFL_HAS_RADIOTAP)) { - if (!__ieee80211_parse_tx_radiotap(tx, skb)) - return TX_DROP; - - /* - * __ieee80211_parse_tx_radiotap has now removed - * the radiotap header that was present and pre-filled - * 'tx' with tx control information. - */ - info->flags &= ~IEEE80211_TX_INTFL_HAS_RADIOTAP; - } + __skb_queue_head_init(&tx->skbs); /* * If this flag is set to true anywhere, and we get here, @@ -1232,7 +1159,9 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata, tx->sta = sta_info_get(sdata, hdr->addr1); if (tx->sta && ieee80211_is_data_qos(hdr->frame_control) && - (local->hw.flags & IEEE80211_HW_AMPDU_AGGREGATION)) { + !ieee80211_is_qos_nullfunc(hdr->frame_control) && + (local->hw.flags & IEEE80211_HW_AMPDU_AGGREGATION) && + !(local->hw.flags & IEEE80211_HW_TX_AMPDU_SETUP_IN_HW)) { struct tid_ampdu_tx *tid_tx; qc = ieee80211_get_qos_ctl(hdr); @@ -1253,54 +1182,38 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata, if (is_multicast_ether_addr(hdr->addr1)) { tx->flags &= ~IEEE80211_TX_UNICAST; info->flags |= IEEE80211_TX_CTL_NO_ACK; - } else { + } else tx->flags |= IEEE80211_TX_UNICAST; - if (unlikely(local->wifi_wme_noack_test)) - info->flags |= IEEE80211_TX_CTL_NO_ACK; - else - info->flags &= ~IEEE80211_TX_CTL_NO_ACK; - } - if (tx->flags & IEEE80211_TX_FRAGMENTED) { - if ((tx->flags & IEEE80211_TX_UNICAST) && - skb->len + FCS_LEN > local->hw.wiphy->frag_threshold && - !(info->flags & IEEE80211_TX_CTL_AMPDU)) - tx->flags |= IEEE80211_TX_FRAGMENTED; - else - tx->flags &= ~IEEE80211_TX_FRAGMENTED; + if (!(info->flags & IEEE80211_TX_CTL_DONTFRAG)) { + if (!(tx->flags & IEEE80211_TX_UNICAST) || + skb->len + FCS_LEN <= local->hw.wiphy->frag_threshold || + info->flags & IEEE80211_TX_CTL_AMPDU) + info->flags |= IEEE80211_TX_CTL_DONTFRAG; } if (!tx->sta) info->flags |= IEEE80211_TX_CTL_CLEAR_PS_FILT; - else if (test_and_clear_sta_flags(tx->sta, WLAN_STA_CLEAR_PS_FILT)) + else if (test_and_clear_sta_flag(tx->sta, WLAN_STA_CLEAR_PS_FILT)) info->flags |= IEEE80211_TX_CTL_CLEAR_PS_FILT; - hdrlen = ieee80211_hdrlen(hdr->frame_control); - if (skb->len > hdrlen + sizeof(rfc1042_header) + 2) { - u8 *pos = &skb->data[hdrlen + sizeof(rfc1042_header)]; - tx->ethertype = (pos[0] << 8) | pos[1]; - } info->flags |= IEEE80211_TX_CTL_FIRST_FRAGMENT; return TX_CONTINUE; } -/* - * Returns false if the frame couldn't be transmitted but was queued instead. - */ -static bool __ieee80211_tx(struct ieee80211_local *local, struct sk_buff **skbp, - struct sta_info *sta, bool txpending) +static bool ieee80211_tx_frags(struct ieee80211_local *local, + struct ieee80211_vif *vif, + struct ieee80211_sta *sta, + struct sk_buff_head *skbs, + bool txpending) { - struct sk_buff *skb = *skbp, *next; + struct sk_buff *skb, *tmp; struct ieee80211_tx_info *info; - struct ieee80211_sub_if_data *sdata; unsigned long flags; - int len; - bool fragm = false; - while (skb) { + skb_queue_walk_safe(skbs, skb, tmp) { int q = skb_get_queue_mapping(skb); - __le16 fc; spin_lock_irqsave(&local->queue_stop_reason_lock, flags); if (local->queue_stop_reasons[q] || @@ -1310,24 +1223,11 @@ static bool __ieee80211_tx(struct ieee80211_local *local, struct sk_buff **skbp, * transmission from the tx-pending tasklet when the * queue is woken again. */ - - do { - next = skb->next; - skb->next = NULL; - /* - * NB: If txpending is true, next must already - * be NULL since we must've gone through this - * loop before already; therefore we can just - * queue the frame to the head without worrying - * about reordering of fragments. - */ - if (unlikely(txpending)) - __skb_queue_head(&local->pending[q], - skb); - else - __skb_queue_tail(&local->pending[q], - skb); - } while ((skb = next)); + if (txpending) + skb_queue_splice_init(skbs, &local->pending[q]); + else + skb_queue_splice_tail_init(skbs, + &local->pending[q]); spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); @@ -1336,47 +1236,72 @@ static bool __ieee80211_tx(struct ieee80211_local *local, struct sk_buff **skbp, spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); info = IEEE80211_SKB_CB(skb); + info->control.vif = vif; + info->control.sta = sta; - if (fragm) - info->flags &= ~(IEEE80211_TX_CTL_CLEAR_PS_FILT | - IEEE80211_TX_CTL_FIRST_FRAGMENT); - - next = skb->next; - len = skb->len; + __skb_unlink(skb, skbs); + drv_tx(local, skb); + } - if (next) - info->flags |= IEEE80211_TX_CTL_MORE_FRAMES; + return true; +} - sdata = vif_to_sdata(info->control.vif); +/* + * Returns false if the frame couldn't be transmitted but was queued instead. + */ +static bool __ieee80211_tx(struct ieee80211_local *local, + struct sk_buff_head *skbs, int led_len, + struct sta_info *sta, bool txpending) +{ + struct ieee80211_tx_info *info; + struct ieee80211_sub_if_data *sdata; + struct ieee80211_vif *vif; + struct ieee80211_sta *pubsta; + struct sk_buff *skb; + bool result = true; + __le16 fc; - switch (sdata->vif.type) { - case NL80211_IFTYPE_MONITOR: - info->control.vif = NULL; - break; - case NL80211_IFTYPE_AP_VLAN: - info->control.vif = &container_of(sdata->bss, - struct ieee80211_sub_if_data, u.ap)->vif; - break; - default: - /* keep */ - break; - } + if (WARN_ON(skb_queue_empty(skbs))) + return true; - if (sta && sta->uploaded) - info->control.sta = &sta->sta; - else - info->control.sta = NULL; + skb = skb_peek(skbs); + fc = ((struct ieee80211_hdr *)skb->data)->frame_control; + info = IEEE80211_SKB_CB(skb); + sdata = vif_to_sdata(info->control.vif); + if (sta && !sta->uploaded) + sta = NULL; - fc = ((struct ieee80211_hdr *)skb->data)->frame_control; - drv_tx(local, skb); + if (sta) + pubsta = &sta->sta; + else + pubsta = NULL; - ieee80211_tpt_led_trig_tx(local, fc, len); - *skbp = skb = next; - ieee80211_led_tx(local, 1); - fragm = true; + switch (sdata->vif.type) { + case NL80211_IFTYPE_MONITOR: + sdata = NULL; + vif = NULL; + break; + case NL80211_IFTYPE_AP_VLAN: + sdata = container_of(sdata->bss, + struct ieee80211_sub_if_data, u.ap); + /* fall through */ + default: + vif = &sdata->vif; + break; } - return true; + if (local->ops->tx_frags) + drv_tx_frags(local, vif, pubsta, skbs); + else + result = ieee80211_tx_frags(local, vif, pubsta, skbs, + txpending); + + ieee80211_tpt_led_trig_tx(local, fc, led_len); + ieee80211_led_tx(local, 1); + + WARN_ON_ONCE(!skb_queue_empty(skbs)); + + return result; } /* @@ -1385,8 +1310,7 @@ static bool __ieee80211_tx(struct ieee80211_local *local, struct sk_buff **skbp, */ static int invoke_tx_handlers(struct ieee80211_tx_data *tx) { - struct sk_buff *skb = tx->skb; - struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb); ieee80211_tx_result res = TX_DROP; #define CALL_TXH(txh) \ @@ -1404,8 +1328,11 @@ static int invoke_tx_handlers(struct ieee80211_tx_data *tx) if (!(tx->local->hw.flags & IEEE80211_HW_HAS_RATE_CONTROL)) CALL_TXH(ieee80211_tx_h_rate_ctrl); - if (unlikely(info->flags & IEEE80211_TX_INTFL_RETRANSMISSION)) + if (unlikely(info->flags & IEEE80211_TX_INTFL_RETRANSMISSION)) { + __skb_queue_tail(&tx->skbs, tx->skb); + tx->skb = NULL; goto txh_done; + } CALL_TXH(ieee80211_tx_h_michael_mic_add); CALL_TXH(ieee80211_tx_h_sequence); @@ -1420,13 +1347,10 @@ static int invoke_tx_handlers(struct ieee80211_tx_data *tx) txh_done: if (unlikely(res == TX_DROP)) { I802_DEBUG_INC(tx->local->tx_handlers_drop); - while (skb) { - struct sk_buff *next; - - next = skb->next; - dev_kfree_skb(skb); - skb = next; - } + if (tx->skb) + dev_kfree_skb(tx->skb); + else + __skb_queue_purge(&tx->skbs); return -1; } else if (unlikely(res == TX_QUEUED)) { I802_DEBUG_INC(tx->local->tx_handlers_queued); @@ -1447,6 +1371,7 @@ static bool ieee80211_tx(struct ieee80211_sub_if_data *sdata, ieee80211_tx_result res_prepare; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); bool result = true; + int led_len; if (unlikely(skb->len < 10)) { dev_kfree_skb(skb); @@ -1456,6 +1381,7 @@ static bool ieee80211_tx(struct ieee80211_sub_if_data *sdata, rcu_read_lock(); /* initialises tx */ + led_len = skb->len; res_prepare = ieee80211_tx_prepare(sdata, &tx, skb); if (unlikely(res_prepare == TX_DROP)) { @@ -1469,7 +1395,8 @@ static bool ieee80211_tx(struct ieee80211_sub_if_data *sdata, info->band = tx.channel->band; if (!invoke_tx_handlers(&tx)) - result = __ieee80211_tx(local, &tx.skb, tx.sta, txpending); + result = __ieee80211_tx(local, &tx.skbs, led_len, + tx.sta, txpending); out: rcu_read_unlock(); return result; @@ -1490,11 +1417,6 @@ static int ieee80211_skb_resize(struct ieee80211_sub_if_data *sdata, tail_need = max_t(int, tail_need, 0); } - if (head_need || tail_need) { - /* Sorry. Can't account for this any more */ - skb_orphan(skb); - } - if (skb_cloned(skb)) I802_DEBUG_INC(local->tx_expand_skb_head_cloned); else if (head_need || tail_need) @@ -1508,67 +1430,19 @@ static int ieee80211_skb_resize(struct ieee80211_sub_if_data *sdata, return -ENOMEM; } - /* update truesize too */ - skb->truesize += head_need + tail_need; - return 0; } -static void ieee80211_xmit(struct ieee80211_sub_if_data *sdata, - struct sk_buff *skb) +void ieee80211_xmit(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) { struct ieee80211_local *local = sdata->local; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; - struct ieee80211_sub_if_data *tmp_sdata; int headroom; bool may_encrypt; rcu_read_lock(); - if (unlikely(sdata->vif.type == NL80211_IFTYPE_MONITOR)) { - int hdrlen; - u16 len_rthdr; - - info->flags |= IEEE80211_TX_CTL_INJECTED | - IEEE80211_TX_INTFL_HAS_RADIOTAP; - - len_rthdr = ieee80211_get_radiotap_len(skb->data); - hdr = (struct ieee80211_hdr *)(skb->data + len_rthdr); - hdrlen = ieee80211_hdrlen(hdr->frame_control); - - /* check the header is complete in the frame */ - if (likely(skb->len >= len_rthdr + hdrlen)) { - /* - * We process outgoing injected frames that have a - * local address we handle as though they are our - * own frames. - * This code here isn't entirely correct, the local - * MAC address is not necessarily enough to find - * the interface to use; for that proper VLAN/WDS - * support we will need a different mechanism. - */ - - list_for_each_entry_rcu(tmp_sdata, &local->interfaces, - list) { - if (!ieee80211_sdata_running(tmp_sdata)) - continue; - if (tmp_sdata->vif.type == - NL80211_IFTYPE_MONITOR || - tmp_sdata->vif.type == - NL80211_IFTYPE_AP_VLAN || - tmp_sdata->vif.type == - NL80211_IFTYPE_WDS) - continue; - if (compare_ether_addr(tmp_sdata->vif.addr, - hdr->addr2) == 0) { - sdata = tmp_sdata; - break; - } - } - } - } - may_encrypt = !(info->flags & IEEE80211_TX_INTFL_DONT_ENCRYPT); headroom = local->tx_headroom; @@ -1589,17 +1463,100 @@ static void ieee80211_xmit(struct ieee80211_sub_if_data *sdata, if (ieee80211_vif_is_mesh(&sdata->vif) && ieee80211_is_data(hdr->frame_control) && !is_multicast_ether_addr(hdr->addr1)) - if (mesh_nexthop_lookup(skb, sdata)) { + if (mesh_nexthop_resolve(skb, sdata)) { /* skb queued: don't free */ rcu_read_unlock(); return; } - ieee80211_set_qos_hdr(local, skb); + ieee80211_set_qos_hdr(sdata, skb); ieee80211_tx(sdata, skb, false); rcu_read_unlock(); } +static bool ieee80211_parse_tx_radiotap(struct sk_buff *skb) +{ + struct ieee80211_radiotap_iterator iterator; + struct ieee80211_radiotap_header *rthdr = + (struct ieee80211_radiotap_header *) skb->data; + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + int ret = ieee80211_radiotap_iterator_init(&iterator, rthdr, skb->len, + NULL); + u16 txflags; + + info->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT | + IEEE80211_TX_CTL_DONTFRAG; + + /* + * for every radiotap entry that is present + * (ieee80211_radiotap_iterator_next returns -ENOENT when no more + * entries present, or -EINVAL on error) + */ + + while (!ret) { + ret = ieee80211_radiotap_iterator_next(&iterator); + + if (ret) + continue; + + /* see if this argument is something we can use */ + switch (iterator.this_arg_index) { + /* + * You must take care when dereferencing iterator.this_arg + * for multibyte types... the pointer is not aligned. Use + * get_unaligned((type *)iterator.this_arg) to dereference + * iterator.this_arg for type "type" safely on all arches. + */ + case IEEE80211_RADIOTAP_FLAGS: + if (*iterator.this_arg & IEEE80211_RADIOTAP_F_FCS) { + /* + * this indicates that the skb we have been + * handed has the 32-bit FCS CRC at the end... + * we should react to that by snipping it off + * because it will be recomputed and added + * on transmission + */ + if (skb->len < (iterator._max_length + FCS_LEN)) + return false; + + skb_trim(skb, skb->len - FCS_LEN); + } + if (*iterator.this_arg & IEEE80211_RADIOTAP_F_WEP) + info->flags &= ~IEEE80211_TX_INTFL_DONT_ENCRYPT; + if (*iterator.this_arg & IEEE80211_RADIOTAP_F_FRAG) + info->flags &= ~IEEE80211_TX_CTL_DONTFRAG; + break; + + case IEEE80211_RADIOTAP_TX_FLAGS: + txflags = get_unaligned_le16(iterator.this_arg); + if (txflags & IEEE80211_RADIOTAP_F_TX_NOACK) + info->flags |= IEEE80211_TX_CTL_NO_ACK; + break; + + /* + * Please update the file + * Documentation/networking/mac80211-injection.txt + * when parsing new fields here. + */ + + default: + break; + } + } + + if (ret != -ENOENT) /* ie, if we didn't simply run out of fields */ + return false; + + /* + * remove the radiotap header + * iterator->_max_length was sanity-checked against + * skb->len by iterator init + */ + skb_pull(skb, iterator._max_length); + + return true; +} + netdev_tx_t ieee80211_monitor_start_xmit(struct sk_buff *skb, struct net_device *dev) { @@ -1608,7 +1565,10 @@ netdev_tx_t ieee80211_monitor_start_xmit(struct sk_buff *skb, struct ieee80211_radiotap_header *prthdr = (struct ieee80211_radiotap_header *)skb->data; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + struct ieee80211_hdr *hdr; + struct ieee80211_sub_if_data *tmp_sdata, *sdata; u16 len_rthdr; + int hdrlen; /* * Frame injection is not allowed if beaconing is not allowed @@ -1659,12 +1619,65 @@ netdev_tx_t ieee80211_monitor_start_xmit(struct sk_buff *skb, skb_set_network_header(skb, len_rthdr); skb_set_transport_header(skb, len_rthdr); + if (skb->len < len_rthdr + 2) + goto fail; + + hdr = (struct ieee80211_hdr *)(skb->data + len_rthdr); + hdrlen = ieee80211_hdrlen(hdr->frame_control); + + if (skb->len < len_rthdr + hdrlen) + goto fail; + + /* + * Initialize skb->protocol if the injected frame is a data frame + * carrying a rfc1042 header + */ + if (ieee80211_is_data(hdr->frame_control) && + skb->len >= len_rthdr + hdrlen + sizeof(rfc1042_header) + 2) { + u8 *payload = (u8 *)hdr + hdrlen; + + if (compare_ether_addr(payload, rfc1042_header) == 0) + skb->protocol = cpu_to_be16((payload[6] << 8) | + payload[7]); + } + memset(info, 0, sizeof(*info)); - info->flags |= IEEE80211_TX_CTL_REQ_TX_STATUS; + info->flags = IEEE80211_TX_CTL_REQ_TX_STATUS | + IEEE80211_TX_CTL_INJECTED; + + /* process and remove the injection radiotap header */ + if (!ieee80211_parse_tx_radiotap(skb)) + goto fail; + + rcu_read_lock(); + + /* + * We process outgoing injected frames that have a local address + * we handle as though they are non-injected frames. + * This code here isn't entirely correct, the local MAC address + * isn't always enough to find the interface to use; for proper + * VLAN/WDS support we will need a different mechanism (which + * likely isn't going to be monitor interfaces). + */ + sdata = IEEE80211_DEV_TO_SUB_IF(dev); + + list_for_each_entry_rcu(tmp_sdata, &local->interfaces, list) { + if (!ieee80211_sdata_running(tmp_sdata)) + continue; + if (tmp_sdata->vif.type == NL80211_IFTYPE_MONITOR || + tmp_sdata->vif.type == NL80211_IFTYPE_AP_VLAN || + tmp_sdata->vif.type == NL80211_IFTYPE_WDS) + continue; + if (compare_ether_addr(tmp_sdata->vif.addr, hdr->addr2) == 0) { + sdata = tmp_sdata; + break; + } + } + + ieee80211_xmit(sdata, skb); + rcu_read_unlock(); - /* pass the radiotap header up to xmit */ - ieee80211_xmit(IEEE80211_DEV_TO_SUB_IF(dev), skb); return NETDEV_TX_OK; fail: @@ -1703,8 +1716,11 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, int encaps_len, skip_header_bytes; int nh_pos, h_pos; struct sta_info *sta = NULL; - u32 sta_flags = 0; - struct sk_buff *tmp_skb; + bool wme_sta = false, authorized = false, tdls_auth = false; + bool tdls_direct = false; + bool multicast; + u32 info_flags = 0; + u16 info_id = 0; if (unlikely(skb->len < ETH_HLEN)) { ret = NETDEV_TX_OK; @@ -1728,7 +1744,8 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, memcpy(hdr.addr3, skb->data, ETH_ALEN); memcpy(hdr.addr4, skb->data + ETH_ALEN, ETH_ALEN); hdrlen = 30; - sta_flags = get_sta_flags(sta); + authorized = test_sta_flag(sta, WLAN_STA_AUTHORIZED); + wme_sta = test_sta_flag(sta, WLAN_STA_WME); } rcu_read_unlock(); if (sta) @@ -1816,11 +1833,50 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, break; #endif case NL80211_IFTYPE_STATION: - memcpy(hdr.addr1, sdata->u.mgd.bssid, ETH_ALEN); - if (sdata->u.mgd.use_4addr && - cpu_to_be16(ethertype) != sdata->control_port_protocol) { - fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS); + if (sdata->wdev.wiphy->flags & WIPHY_FLAG_SUPPORTS_TDLS) { + bool tdls_peer = false; + + rcu_read_lock(); + sta = sta_info_get(sdata, skb->data); + if (sta) { + authorized = test_sta_flag(sta, + WLAN_STA_AUTHORIZED); + wme_sta = test_sta_flag(sta, WLAN_STA_WME); + tdls_peer = test_sta_flag(sta, + WLAN_STA_TDLS_PEER); + tdls_auth = test_sta_flag(sta, + WLAN_STA_TDLS_PEER_AUTH); + } + rcu_read_unlock(); + + /* + * If the TDLS link is enabled, send everything + * directly. Otherwise, allow TDLS setup frames + * to be transmitted indirectly. + */ + tdls_direct = tdls_peer && (tdls_auth || + !(ethertype == ETH_P_TDLS && skb->len > 14 && + skb->data[14] == WLAN_TDLS_SNAP_RFTYPE)); + } + + if (tdls_direct) { + /* link during setup - throw out frames to peer */ + if (!tdls_auth) { + ret = NETDEV_TX_OK; + goto fail; + } + + /* DA SA BSSID */ + memcpy(hdr.addr1, skb->data, ETH_ALEN); + memcpy(hdr.addr2, skb->data + ETH_ALEN, ETH_ALEN); + memcpy(hdr.addr3, sdata->u.mgd.bssid, ETH_ALEN); + hdrlen = 24; + } else if (sdata->u.mgd.use_4addr && + cpu_to_be16(ethertype) != sdata->control_port_protocol) { + fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS | + IEEE80211_FCTL_TODS); /* RA TA DA SA */ + memcpy(hdr.addr1, sdata->u.mgd.bssid, ETH_ALEN); memcpy(hdr.addr2, sdata->vif.addr, ETH_ALEN); memcpy(hdr.addr3, skb->data, ETH_ALEN); memcpy(hdr.addr4, skb->data + ETH_ALEN, ETH_ALEN); @@ -1828,6 +1884,7 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, } else { fc |= cpu_to_le16(IEEE80211_FCTL_TODS); /* BSSID SA DA */ + memcpy(hdr.addr1, sdata->u.mgd.bssid, ETH_ALEN); memcpy(hdr.addr2, skb->data + ETH_ALEN, ETH_ALEN); memcpy(hdr.addr3, skb->data, ETH_ALEN); hdrlen = 24; @@ -1850,16 +1907,23 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, * if it is a multicast address (which can only happen * in AP mode) */ - if (!is_multicast_ether_addr(hdr.addr1)) { + multicast = is_multicast_ether_addr(hdr.addr1); + if (!multicast) { rcu_read_lock(); sta = sta_info_get(sdata, hdr.addr1); - if (sta) - sta_flags = get_sta_flags(sta); + if (sta) { + authorized = test_sta_flag(sta, WLAN_STA_AUTHORIZED); + wme_sta = test_sta_flag(sta, WLAN_STA_WME); + } rcu_read_unlock(); } + /* For mesh, the use of the QoS header is mandatory */ + if (ieee80211_vif_is_mesh(&sdata->vif)) + wme_sta = true; + /* receiver and we are QoS enabled, use a QoS type frame */ - if ((sta_flags & WLAN_STA_WME) && local->hw.queues >= 4) { + if (wme_sta && local->hw.queues >= 4) { fc |= cpu_to_le16(IEEE80211_STYPE_QOS_DATA); hdrlen += 2; } @@ -1868,12 +1932,10 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, * Drop unicast frames to unauthorised stations unless they are * EAPOL frames from the local station. */ - if (!ieee80211_vif_is_mesh(&sdata->vif) && - unlikely(!is_multicast_ether_addr(hdr.addr1) && - !(sta_flags & WLAN_STA_AUTHORIZED) && - !(cpu_to_be16(ethertype) == sdata->control_port_protocol && - compare_ether_addr(sdata->vif.addr, - skb->data + ETH_ALEN) == 0))) { + if (unlikely(!ieee80211_vif_is_mesh(&sdata->vif) && + !is_multicast_ether_addr(hdr.addr1) && !authorized && + (cpu_to_be16(ethertype) != sdata->control_port_protocol || + compare_ether_addr(sdata->vif.addr, skb->data + ETH_ALEN)))) { #ifdef CONFIG_MAC80211_VERBOSE_DEBUG if (net_ratelimit()) printk(KERN_DEBUG "%s: dropped frame to %pM" @@ -1887,11 +1949,54 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, goto fail; } + if (unlikely(!multicast && skb->sk && + skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS)) { + struct sk_buff *orig_skb = skb; + + skb = skb_clone(skb, GFP_ATOMIC); + if (skb) { + unsigned long flags; + int id, r; + + spin_lock_irqsave(&local->ack_status_lock, flags); + r = idr_get_new_above(&local->ack_status_frames, + orig_skb, 1, &id); + if (r == -EAGAIN) { + idr_pre_get(&local->ack_status_frames, + GFP_ATOMIC); + r = idr_get_new_above(&local->ack_status_frames, + orig_skb, 1, &id); + } + if (WARN_ON(!id) || id > 0xffff) { + idr_remove(&local->ack_status_frames, id); + r = -ERANGE; + } + spin_unlock_irqrestore(&local->ack_status_lock, flags); + + if (!r) { + info_id = id; + info_flags |= IEEE80211_TX_CTL_REQ_TX_STATUS; + } else if (skb_shared(skb)) { + kfree_skb(orig_skb); + } else { + kfree_skb(skb); + skb = orig_skb; + } + } else { + /* couldn't clone -- lose tx status ... */ + skb = orig_skb; + } + } + /* * If the skb is shared we need to obtain our own copy. */ if (skb_shared(skb)) { - tmp_skb = skb; + struct sk_buff *tmp_skb = skb; + + /* can't happen -- skb is a clone if info_id != 0 */ + WARN_ON(info_id); + skb = skb_clone(skb, GFP_ATOMIC); kfree_skb(tmp_skb); @@ -1992,6 +2097,10 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, memset(info, 0, sizeof(*info)); dev->trans_start = jiffies; + + info->flags = info_flags; + info->ack_frame_id = info_id; + ieee80211_xmit(sdata, skb); return NETDEV_TX_OK; @@ -2035,10 +2144,15 @@ static bool ieee80211_tx_pending_skb(struct ieee80211_local *local, if (info->flags & IEEE80211_TX_INTFL_NEED_TXPROCESSING) { result = ieee80211_tx(sdata, skb, true); } else { + struct sk_buff_head skbs; + + __skb_queue_head_init(&skbs); + __skb_queue_tail(&skbs, skb); + hdr = (struct ieee80211_hdr *)skb->data; sta = sta_info_get(sdata, hdr->addr1); - result = __ieee80211_tx(local, &skb, sta, true); + result = __ieee80211_tx(local, &skbs, skb->len, sta, true); } return result; @@ -2151,10 +2265,10 @@ static void ieee80211_beacon_add_tim(struct ieee80211_if_ap *bss, /* Bitmap control */ *pos++ = n1 | aid0; /* Part Virt Bitmap */ + skb_put(skb, n2 - n1); memcpy(pos, bss->tim + n1, n2 - n1 + 1); tim[1] = n2 - n1 + 4; - skb_put(skb, n2 - n1); } else { *pos++ = aid0; /* Bitmap control */ *pos++ = 0; /* Part Virt Bitmap */ @@ -2219,9 +2333,9 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, } else { unsigned long flags; - spin_lock_irqsave(&local->sta_lock, flags); + spin_lock_irqsave(&local->tim_lock, flags); ieee80211_beacon_add_tim(ap, skb, beacon); - spin_unlock_irqrestore(&local->sta_lock, flags); + spin_unlock_irqrestore(&local->tim_lock, flags); } if (tim_offset) @@ -2252,22 +2366,31 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, } else if (ieee80211_vif_is_mesh(&sdata->vif)) { struct ieee80211_mgmt *mgmt; u8 *pos; + int hdr_len = offsetof(struct ieee80211_mgmt, u.beacon) + + sizeof(mgmt->u.beacon); #ifdef CONFIG_MAC80211_MESH if (!sdata->u.mesh.mesh_id_len) goto out; #endif - /* headroom, head length, tail length and maximum TIM length */ - skb = dev_alloc_skb(local->tx_headroom + 400 + - sdata->u.mesh.ie_len); + skb = dev_alloc_skb(local->tx_headroom + + hdr_len + + 2 + /* NULL SSID */ + 2 + 8 + /* supported rates */ + 2 + 3 + /* DS params */ + 2 + (IEEE80211_MAX_SUPP_RATES - 8) + + 2 + sizeof(struct ieee80211_ht_cap) + + 2 + sizeof(struct ieee80211_ht_info) + + 2 + sdata->u.mesh.mesh_id_len + + 2 + sizeof(struct ieee80211_meshconf_ie) + + sdata->u.mesh.ie_len); if (!skb) goto out; skb_reserve(skb, local->hw.extra_tx_headroom); - mgmt = (struct ieee80211_mgmt *) - skb_put(skb, 24 + sizeof(mgmt->u.beacon)); - memset(mgmt, 0, 24 + sizeof(mgmt->u.beacon)); + mgmt = (struct ieee80211_mgmt *) skb_put(skb, hdr_len); + memset(mgmt, 0, hdr_len); mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_BEACON); memset(mgmt->da, 0xff, ETH_ALEN); @@ -2275,13 +2398,25 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, memcpy(mgmt->bssid, sdata->vif.addr, ETH_ALEN); mgmt->u.beacon.beacon_int = cpu_to_le16(sdata->vif.bss_conf.beacon_int); - mgmt->u.beacon.capab_info = 0x0; /* 0x0 for MPs */ + mgmt->u.beacon.capab_info |= cpu_to_le16( + sdata->u.mesh.security ? WLAN_CAPABILITY_PRIVACY : 0); pos = skb_put(skb, 2); *pos++ = WLAN_EID_SSID; *pos++ = 0x0; - mesh_mgmt_ies_add(skb, sdata); + if (ieee80211_add_srates_ie(&sdata->vif, skb) || + mesh_add_ds_params_ie(skb, sdata) || + ieee80211_add_ext_srates_ie(&sdata->vif, skb) || + mesh_add_rsn_ie(skb, sdata) || + mesh_add_ht_cap_ie(skb, sdata) || + mesh_add_ht_info_ie(skb, sdata) || + mesh_add_meshid_ie(skb, sdata) || + mesh_add_meshconf_ie(skb, sdata) || + mesh_add_vendor_ies(skb, sdata)) { + pr_err("o11s: couldn't add ies!\n"); + goto out; + } } else { WARN_ON(1); goto out; @@ -2318,6 +2453,37 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, } EXPORT_SYMBOL(ieee80211_beacon_get_tim); +struct sk_buff *ieee80211_proberesp_get(struct ieee80211_hw *hw, + struct ieee80211_vif *vif) +{ + struct ieee80211_if_ap *ap = NULL; + struct sk_buff *presp = NULL, *skb = NULL; + struct ieee80211_hdr *hdr; + struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); + + if (sdata->vif.type != NL80211_IFTYPE_AP) + return NULL; + + rcu_read_lock(); + + ap = &sdata->u.ap; + presp = rcu_dereference(ap->probe_resp); + if (!presp) + goto out; + + skb = skb_copy(presp, GFP_ATOMIC); + if (!skb) + goto out; + + hdr = (struct ieee80211_hdr *) skb->data; + memset(hdr->addr1, 0, sizeof(hdr->addr1)); + +out: + rcu_read_unlock(); + return skb; +} +EXPORT_SYMBOL(ieee80211_proberesp_get); + struct sk_buff *ieee80211_pspoll_get(struct ieee80211_hw *hw, struct ieee80211_vif *vif) { @@ -2335,11 +2501,9 @@ struct sk_buff *ieee80211_pspoll_get(struct ieee80211_hw *hw, local = sdata->local; skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*pspoll)); - if (!skb) { - printk(KERN_DEBUG "%s: failed to allocate buffer for " - "pspoll template\n", sdata->name); + if (!skb) return NULL; - } + skb_reserve(skb, local->hw.extra_tx_headroom); pspoll = (struct ieee80211_pspoll *) skb_put(skb, sizeof(*pspoll)); @@ -2375,11 +2539,9 @@ struct sk_buff *ieee80211_nullfunc_get(struct ieee80211_hw *hw, local = sdata->local; skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*nullfunc)); - if (!skb) { - printk(KERN_DEBUG "%s: failed to allocate buffer for nullfunc " - "template\n", sdata->name); + if (!skb) return NULL; - } + skb_reserve(skb, local->hw.extra_tx_headroom); nullfunc = (struct ieee80211_hdr_3addr *) skb_put(skb, @@ -2414,11 +2576,8 @@ struct sk_buff *ieee80211_probereq_get(struct ieee80211_hw *hw, skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*hdr) + ie_ssid_len + ie_len); - if (!skb) { - printk(KERN_DEBUG "%s: failed to allocate buffer for probe " - "request template\n", sdata->name); + if (!skb) return NULL; - } skb_reserve(skb, local->hw.extra_tx_headroom); @@ -2537,15 +2696,15 @@ ieee80211_get_buffered_bc(struct ieee80211_hw *hw, } EXPORT_SYMBOL(ieee80211_get_buffered_bc); -void ieee80211_tx_skb(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) +void ieee80211_tx_skb_tid(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb, int tid) { skb_set_mac_header(skb, 0); skb_set_network_header(skb, 0); skb_set_transport_header(skb, 0); - /* Send all internal mgmt frames on VO. Accordingly set TID to 7. */ - skb_set_queue_mapping(skb, IEEE80211_AC_VO); - skb->priority = 7; + skb_set_queue_mapping(skb, ieee802_1d_to_ac[tid]); + skb->priority = tid; /* * The other path calling ieee80211_xmit is from the tasklet, diff --git a/net/mac80211/util.c b/net/mac80211/util.c index ddeb1b998383..9919892575f4 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -13,6 +13,7 @@ #include <net/mac80211.h> #include <linux/netdevice.h> +#include <linux/export.h> #include <linux/types.h> #include <linux/slab.h> #include <linux/skbuff.h> @@ -96,13 +97,13 @@ u8 *ieee80211_get_bssid(struct ieee80211_hdr *hdr, size_t len, void ieee80211_tx_set_protected(struct ieee80211_tx_data *tx) { - struct sk_buff *skb = tx->skb; + struct sk_buff *skb; struct ieee80211_hdr *hdr; - do { + skb_queue_walk(&tx->skbs, skb) { hdr = (struct ieee80211_hdr *) skb->data; hdr->frame_control |= cpu_to_le16(IEEE80211_FCTL_PROTECTED); - } while ((skb = skb->next)); + } } int ieee80211_frame_duration(struct ieee80211_local *local, size_t len, @@ -368,14 +369,14 @@ void ieee80211_add_pending_skb(struct ieee80211_local *local, spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); } -int ieee80211_add_pending_skbs_fn(struct ieee80211_local *local, - struct sk_buff_head *skbs, - void (*fn)(void *data), void *data) +void ieee80211_add_pending_skbs_fn(struct ieee80211_local *local, + struct sk_buff_head *skbs, + void (*fn)(void *data), void *data) { struct ieee80211_hw *hw = &local->hw; struct sk_buff *skb; unsigned long flags; - int queue, ret = 0, i; + int queue, i; spin_lock_irqsave(&local->queue_stop_reason_lock, flags); for (i = 0; i < hw->queues; i++) @@ -390,7 +391,6 @@ int ieee80211_add_pending_skbs_fn(struct ieee80211_local *local, continue; } - ret++; queue = skb_get_queue_mapping(skb); __skb_queue_tail(&local->pending[queue], skb); } @@ -402,14 +402,12 @@ int ieee80211_add_pending_skbs_fn(struct ieee80211_local *local, __ieee80211_wake_queue(hw, i, IEEE80211_QUEUE_STOP_REASON_SKB_ADD); spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); - - return ret; } -int ieee80211_add_pending_skbs(struct ieee80211_local *local, - struct sk_buff_head *skbs) +void ieee80211_add_pending_skbs(struct ieee80211_local *local, + struct sk_buff_head *skbs) { - return ieee80211_add_pending_skbs_fn(local, skbs, NULL, NULL); + ieee80211_add_pending_skbs_fn(local, skbs, NULL, NULL); } void ieee80211_stop_queues_by_reason(struct ieee80211_hw *hw, @@ -567,12 +565,6 @@ void ieee80211_queue_delayed_work(struct ieee80211_hw *hw, } EXPORT_SYMBOL(ieee80211_queue_delayed_work); -void ieee802_11_parse_elems(u8 *start, size_t len, - struct ieee802_11_elems *elems) -{ - ieee802_11_parse_elems_crc(start, len, elems, 0, 0); -} - u32 ieee802_11_parse_elems_crc(u8 *start, size_t len, struct ieee802_11_elems *elems, u64 filter, u32 crc) @@ -685,9 +677,9 @@ u32 ieee802_11_parse_elems_crc(u8 *start, size_t len, if (elen >= sizeof(struct ieee80211_meshconf_ie)) elems->mesh_config = (void *)pos; break; - case WLAN_EID_PEER_LINK: - elems->peer_link = pos; - elems->peer_link_len = elen; + case WLAN_EID_PEER_MGMT: + elems->peering = pos; + elems->peering_len = elen; break; case WLAN_EID_PREQ: elems->preq = pos; @@ -739,6 +731,12 @@ u32 ieee802_11_parse_elems_crc(u8 *start, size_t len, return crc; } +void ieee802_11_parse_elems(u8 *start, size_t len, + struct ieee802_11_elems *elems) +{ + ieee802_11_parse_elems_crc(start, len, elems, 0, 0); +} + void ieee80211_set_wmm_default(struct ieee80211_sub_if_data *sdata) { struct ieee80211_local *local = sdata->local; @@ -799,8 +797,8 @@ void ieee80211_set_wmm_default(struct ieee80211_sub_if_data *sdata) qparam.uapsd = false; - local->tx_conf[queue] = qparam; - drv_conf_tx(local, queue, &qparam); + sdata->tx_conf[queue] = qparam; + drv_conf_tx(local, sdata, queue, &qparam); } /* after reinitialize QoS TX queues setting to default, @@ -874,11 +872,9 @@ void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata, skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*mgmt) + 6 + extra_len); - if (!skb) { - printk(KERN_DEBUG "%s: failed to allocate buffer for auth " - "frame\n", sdata->name); + if (!skb) return; - } + skb_reserve(skb, local->hw.extra_tx_headroom); mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24 + 6); @@ -983,23 +979,9 @@ int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer, offset = noffset; } - if (sband->ht_cap.ht_supported) { - u16 cap = sband->ht_cap.cap; - __le16 tmp; - - *pos++ = WLAN_EID_HT_CAPABILITY; - *pos++ = sizeof(struct ieee80211_ht_cap); - memset(pos, 0, sizeof(struct ieee80211_ht_cap)); - tmp = cpu_to_le16(cap); - memcpy(pos, &tmp, sizeof(u16)); - pos += sizeof(u16); - *pos++ = sband->ht_cap.ampdu_factor | - (sband->ht_cap.ampdu_density << - IEEE80211_HT_AMPDU_PARM_DENSITY_SHIFT); - memcpy(pos, &sband->ht_cap.mcs, sizeof(sband->ht_cap.mcs)); - pos += sizeof(sband->ht_cap.mcs); - pos += 2 + 4 + 1; /* ext info, BF cap, antsel */ - } + if (sband->ht_cap.ht_supported) + pos = ieee80211_ie_build_ht_cap(pos, &sband->ht_cap, + sband->ht_cap.cap); /* * If adding more here, adjust code in main.c @@ -1031,11 +1013,8 @@ struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata, /* FIXME: come up with a proper value */ buf = kmalloc(200 + ie_len, GFP_KERNEL); - if (!buf) { - printk(KERN_DEBUG "%s: failed to allocate temporary IE " - "buffer\n", sdata->name); + if (!buf) return NULL; - } /* * Do not send DS Channel parameter for directed probe requests @@ -1055,6 +1034,8 @@ struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata, skb = ieee80211_probereq_get(&local->hw, &sdata->vif, ssid, ssid_len, buf, buf_len); + if (!skb) + goto out; if (dst) { mgmt = (struct ieee80211_mgmt *) skb->data; @@ -1063,6 +1044,8 @@ struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata, } IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT; + + out: kfree(buf); return skb; @@ -1071,14 +1054,18 @@ struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata, void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst, const u8 *ssid, size_t ssid_len, const u8 *ie, size_t ie_len, - u32 ratemask, bool directed) + u32 ratemask, bool directed, bool no_cck) { struct sk_buff *skb; skb = ieee80211_build_probe_req(sdata, dst, ratemask, ssid, ssid_len, ie, ie_len, directed); - if (skb) + if (skb) { + if (no_cck) + IEEE80211_SKB_CB(skb)->flags |= + IEEE80211_TX_CTL_NO_CCK_RATE; ieee80211_tx_skb(sdata, skb); + } } u32 ieee80211_sta_get_rates(struct ieee80211_local *local, @@ -1155,16 +1142,6 @@ int ieee80211_reconfig(struct ieee80211_local *local) */ } #endif - - /* setup fragmentation threshold */ - drv_set_frag_threshold(local, hw->wiphy->frag_threshold); - - /* setup RTS threshold */ - drv_set_rts_threshold(local, hw->wiphy->rts_threshold); - - /* reset coverage class */ - drv_set_coverage_class(local, hw->wiphy->coverage_class); - /* everything else happens only if HW was up & running */ if (!local->open_count) goto wake_up; @@ -1183,6 +1160,15 @@ int ieee80211_reconfig(struct ieee80211_local *local) return res; } + /* setup fragmentation threshold */ + drv_set_frag_threshold(local, hw->wiphy->frag_threshold); + + /* setup RTS threshold */ + drv_set_rts_threshold(local, hw->wiphy->rts_threshold); + + /* reset coverage class */ + drv_set_coverage_class(local, hw->wiphy->coverage_class); + ieee80211_led_radio(local, true); ieee80211_mod_tpt_led_trig(local, IEEE80211_TPT_LEDTRIG_FL_RADIO, 0); @@ -1192,7 +1178,7 @@ int ieee80211_reconfig(struct ieee80211_local *local) if (sdata->vif.type != NL80211_IFTYPE_AP_VLAN && sdata->vif.type != NL80211_IFTYPE_MONITOR && ieee80211_sdata_running(sdata)) - res = drv_add_interface(local, &sdata->vif); + res = drv_add_interface(local, sdata); } /* add STAs back */ @@ -1211,8 +1197,15 @@ int ieee80211_reconfig(struct ieee80211_local *local) mutex_unlock(&local->sta_mtx); /* reconfigure tx conf */ - for (i = 0; i < hw->queues; i++) - drv_conf_tx(local, i, &local->tx_conf[i]); + list_for_each_entry(sdata, &local->interfaces, list) { + if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN || + sdata->vif.type == NL80211_IFTYPE_MONITOR || + !ieee80211_sdata_running(sdata)) + continue; + + for (i = 0; i < hw->queues; i++) + drv_conf_tx(local, sdata, i, &sdata->tx_conf[i]); + } /* reconfigure hardware */ ieee80211_hw_config(local, ~0); @@ -1235,11 +1228,13 @@ int ieee80211_reconfig(struct ieee80211_local *local) BSS_CHANGED_BEACON_INT | BSS_CHANGED_BSSID | BSS_CHANGED_CQM | - BSS_CHANGED_QOS; + BSS_CHANGED_QOS | + BSS_CHANGED_IDLE; switch (sdata->vif.type) { case NL80211_IFTYPE_STATION: - changed |= BSS_CHANGED_ASSOC; + changed |= BSS_CHANGED_ASSOC | + BSS_CHANGED_ARP_FILTER; mutex_lock(&sdata->u.mgd.mtx); ieee80211_bss_info_change_notify(sdata, changed); mutex_unlock(&sdata->u.mgd.mtx); @@ -1248,6 +1243,12 @@ int ieee80211_reconfig(struct ieee80211_local *local) changed |= BSS_CHANGED_IBSS; /* fall through */ case NL80211_IFTYPE_AP: + changed |= BSS_CHANGED_SSID; + + if (sdata->vif.type == NL80211_IFTYPE_AP) + changed |= BSS_CHANGED_AP_PROBE_RESP; + + /* fall through */ case NL80211_IFTYPE_MESH_POINT: changed |= BSS_CHANGED_BEACON | BSS_CHANGED_BEACON_ENABLED; @@ -1268,6 +1269,8 @@ int ieee80211_reconfig(struct ieee80211_local *local) } } + ieee80211_recalc_ps(local, -1); + /* * Clear the WLAN_STA_BLOCK_BA flag so new aggregation * sessions can be established after a resume. @@ -1283,7 +1286,7 @@ int ieee80211_reconfig(struct ieee80211_local *local) list_for_each_entry(sta, &local->sta_list, list) { ieee80211_sta_tear_down_BA_sessions(sta, true); - clear_sta_flags(sta, WLAN_STA_BLOCK_BA); + clear_sta_flag(sta, WLAN_STA_BLOCK_BA); } mutex_unlock(&local->sta_mtx); @@ -1522,3 +1525,162 @@ void ieee80211_disable_rssi_reports(struct ieee80211_vif *vif) _ieee80211_enable_rssi_reports(sdata, 0, 0); } EXPORT_SYMBOL(ieee80211_disable_rssi_reports); + +u8 *ieee80211_ie_build_ht_cap(u8 *pos, struct ieee80211_sta_ht_cap *ht_cap, + u16 cap) +{ + __le16 tmp; + + *pos++ = WLAN_EID_HT_CAPABILITY; + *pos++ = sizeof(struct ieee80211_ht_cap); + memset(pos, 0, sizeof(struct ieee80211_ht_cap)); + + /* capability flags */ + tmp = cpu_to_le16(cap); + memcpy(pos, &tmp, sizeof(u16)); + pos += sizeof(u16); + + /* AMPDU parameters */ + *pos++ = ht_cap->ampdu_factor | + (ht_cap->ampdu_density << + IEEE80211_HT_AMPDU_PARM_DENSITY_SHIFT); + + /* MCS set */ + memcpy(pos, &ht_cap->mcs, sizeof(ht_cap->mcs)); + pos += sizeof(ht_cap->mcs); + + /* extended capabilities */ + pos += sizeof(__le16); + + /* BF capabilities */ + pos += sizeof(__le32); + + /* antenna selection */ + pos += sizeof(u8); + + return pos; +} + +u8 *ieee80211_ie_build_ht_info(u8 *pos, + struct ieee80211_sta_ht_cap *ht_cap, + struct ieee80211_channel *channel, + enum nl80211_channel_type channel_type) +{ + struct ieee80211_ht_info *ht_info; + /* Build HT Information */ + *pos++ = WLAN_EID_HT_INFORMATION; + *pos++ = sizeof(struct ieee80211_ht_info); + ht_info = (struct ieee80211_ht_info *)pos; + ht_info->control_chan = + ieee80211_frequency_to_channel(channel->center_freq); + switch (channel_type) { + case NL80211_CHAN_HT40MINUS: + ht_info->ht_param = IEEE80211_HT_PARAM_CHA_SEC_BELOW; + break; + case NL80211_CHAN_HT40PLUS: + ht_info->ht_param = IEEE80211_HT_PARAM_CHA_SEC_ABOVE; + break; + case NL80211_CHAN_HT20: + default: + ht_info->ht_param = IEEE80211_HT_PARAM_CHA_SEC_NONE; + break; + } + if (ht_cap->cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40) + ht_info->ht_param |= IEEE80211_HT_PARAM_CHAN_WIDTH_ANY; + + /* + * Note: According to 802.11n-2009 9.13.3.1, HT Protection field and + * RIFS Mode are reserved in IBSS mode, therefore keep them at 0 + */ + ht_info->operation_mode = 0x0000; + ht_info->stbc_param = 0x0000; + + /* It seems that Basic MCS set and Supported MCS set + are identical for the first 10 bytes */ + memset(&ht_info->basic_set, 0, 16); + memcpy(&ht_info->basic_set, &ht_cap->mcs, 10); + + return pos + sizeof(struct ieee80211_ht_info); +} + +enum nl80211_channel_type +ieee80211_ht_info_to_channel_type(struct ieee80211_ht_info *ht_info) +{ + enum nl80211_channel_type channel_type; + + if (!ht_info) + return NL80211_CHAN_NO_HT; + + switch (ht_info->ht_param & IEEE80211_HT_PARAM_CHA_SEC_OFFSET) { + case IEEE80211_HT_PARAM_CHA_SEC_NONE: + channel_type = NL80211_CHAN_HT20; + break; + case IEEE80211_HT_PARAM_CHA_SEC_ABOVE: + channel_type = NL80211_CHAN_HT40PLUS; + break; + case IEEE80211_HT_PARAM_CHA_SEC_BELOW: + channel_type = NL80211_CHAN_HT40MINUS; + break; + default: + channel_type = NL80211_CHAN_NO_HT; + } + + return channel_type; +} + +int ieee80211_add_srates_ie(struct ieee80211_vif *vif, struct sk_buff *skb) +{ + struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); + struct ieee80211_local *local = sdata->local; + struct ieee80211_supported_band *sband; + int rate; + u8 i, rates, *pos; + + sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; + rates = sband->n_bitrates; + if (rates > 8) + rates = 8; + + if (skb_tailroom(skb) < rates + 2) + return -ENOMEM; + + pos = skb_put(skb, rates + 2); + *pos++ = WLAN_EID_SUPP_RATES; + *pos++ = rates; + for (i = 0; i < rates; i++) { + rate = sband->bitrates[i].bitrate; + *pos++ = (u8) (rate / 5); + } + + return 0; +} + +int ieee80211_add_ext_srates_ie(struct ieee80211_vif *vif, struct sk_buff *skb) +{ + struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); + struct ieee80211_local *local = sdata->local; + struct ieee80211_supported_band *sband; + int rate; + u8 i, exrates, *pos; + + sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; + exrates = sband->n_bitrates; + if (exrates > 8) + exrates -= 8; + else + exrates = 0; + + if (skb_tailroom(skb) < exrates + 2) + return -ENOMEM; + + if (exrates) { + pos = skb_put(skb, exrates + 2); + *pos++ = WLAN_EID_EXT_SUPP_RATES; + *pos++ = exrates; + for (i = 8; i < sband->n_bitrates; i++) { + rate = sband->bitrates[i].bitrate; + *pos++ = (u8) (rate / 5); + } + } + return 0; +} diff --git a/net/mac80211/wep.c b/net/mac80211/wep.c index a1c6bfd55f0f..68ad351479df 100644 --- a/net/mac80211/wep.c +++ b/net/mac80211/wep.c @@ -330,13 +330,12 @@ ieee80211_crypto_wep_encrypt(struct ieee80211_tx_data *tx) ieee80211_tx_set_protected(tx); - skb = tx->skb; - do { + skb_queue_walk(&tx->skbs, skb) { if (wep_encrypt_skb(tx, skb) < 0) { I802_DEBUG_INC(tx->local->tx_handlers_drop_wep); return TX_DROP; } - } while ((skb = skb->next)); + } return TX_CONTINUE; } diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c index 7a49532f14cb..89511be3111e 100644 --- a/net/mac80211/wme.c +++ b/net/mac80211/wme.c @@ -52,6 +52,30 @@ static int wme_downgrade_ac(struct sk_buff *skb) } } +/* Indicate which queue to use for this fully formed 802.11 frame */ +u16 ieee80211_select_queue_80211(struct ieee80211_local *local, + struct sk_buff *skb, + struct ieee80211_hdr *hdr) +{ + u8 *p; + + if (local->hw.queues < 4) + return 0; + + if (!ieee80211_is_data(hdr->frame_control)) { + skb->priority = 7; + return ieee802_1d_to_ac[skb->priority]; + } + if (!ieee80211_is_data_qos(hdr->frame_control)) { + skb->priority = 0; + return ieee802_1d_to_ac[skb->priority]; + } + + p = ieee80211_get_qos_ctl(hdr); + skb->priority = *p & IEEE80211_QOS_CTL_TAG1D_MASK; + + return ieee80211_downgrade_queue(local, skb); +} /* Indicate which queue to use. */ u16 ieee80211_select_queue(struct ieee80211_sub_if_data *sdata, @@ -72,7 +96,7 @@ u16 ieee80211_select_queue(struct ieee80211_sub_if_data *sdata, case NL80211_IFTYPE_AP_VLAN: sta = rcu_dereference(sdata->u.vlan.sta); if (sta) { - qos = get_sta_flags(sta) & WLAN_STA_WME; + qos = test_sta_flag(sta, WLAN_STA_WME); break; } case NL80211_IFTYPE_AP: @@ -83,11 +107,7 @@ u16 ieee80211_select_queue(struct ieee80211_sub_if_data *sdata, break; #ifdef CONFIG_MAC80211_MESH case NL80211_IFTYPE_MESH_POINT: - /* - * XXX: This is clearly broken ... but already was before, - * because ieee80211_fill_mesh_addresses() would clear A1 - * except for multicast addresses. - */ + qos = true; break; #endif case NL80211_IFTYPE_STATION: @@ -103,7 +123,7 @@ u16 ieee80211_select_queue(struct ieee80211_sub_if_data *sdata, if (!sta && ra && !is_multicast_ether_addr(ra)) { sta = sta_info_get(sdata, ra); if (sta) - qos = get_sta_flags(sta) & WLAN_STA_WME; + qos = test_sta_flag(sta, WLAN_STA_WME); } rcu_read_unlock(); @@ -139,21 +159,31 @@ u16 ieee80211_downgrade_queue(struct ieee80211_local *local, return ieee802_1d_to_ac[skb->priority]; } -void ieee80211_set_qos_hdr(struct ieee80211_local *local, struct sk_buff *skb) +void ieee80211_set_qos_hdr(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb) { struct ieee80211_hdr *hdr = (void *)skb->data; + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); /* Fill in the QoS header if there is one. */ if (ieee80211_is_data_qos(hdr->frame_control)) { u8 *p = ieee80211_get_qos_ctl(hdr); - u8 ack_policy = 0, tid; + u8 ack_policy, tid; tid = skb->priority & IEEE80211_QOS_CTL_TAG1D_MASK; - if (unlikely(local->wifi_wme_noack_test)) + /* preserve EOSP bit */ + ack_policy = *p & IEEE80211_QOS_CTL_EOSP; + + if (is_multicast_ether_addr(hdr->addr1) || + sdata->noack_map & BIT(tid)) { ack_policy |= IEEE80211_QOS_CTL_ACK_POLICY_NOACK; - /* qos header is 2 bytes, second reserved */ + info->flags |= IEEE80211_TX_CTL_NO_ACK; + } + + /* qos header is 2 bytes */ *p++ = ack_policy | tid; - *p = 0; + *p = ieee80211_vif_is_mesh(&sdata->vif) ? + (IEEE80211_QOS_CTL_MESH_CONTROL_PRESENT >> 8) : 0; } } diff --git a/net/mac80211/wme.h b/net/mac80211/wme.h index faead6d02026..94edceb617ff 100644 --- a/net/mac80211/wme.h +++ b/net/mac80211/wme.h @@ -15,9 +15,13 @@ extern const int ieee802_1d_to_ac[8]; +u16 ieee80211_select_queue_80211(struct ieee80211_local *local, + struct sk_buff *skb, + struct ieee80211_hdr *hdr); u16 ieee80211_select_queue(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb); -void ieee80211_set_qos_hdr(struct ieee80211_local *local, struct sk_buff *skb); +void ieee80211_set_qos_hdr(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb); u16 ieee80211_downgrade_queue(struct ieee80211_local *local, struct sk_buff *skb); diff --git a/net/mac80211/work.c b/net/mac80211/work.c index 380b9a7462b6..c6dd01a05291 100644 --- a/net/mac80211/work.c +++ b/net/mac80211/work.c @@ -94,7 +94,8 @@ static int ieee80211_compatible_rates(const u8 *supp_rates, int supp_rates_len, /* frame sending functions */ -static void ieee80211_add_ht_ie(struct sk_buff *skb, const u8 *ht_info_ie, +static void ieee80211_add_ht_ie(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb, const u8 *ht_info_ie, struct ieee80211_supported_band *sband, struct ieee80211_channel *channel, enum ieee80211_smps_mode smps) @@ -102,8 +103,10 @@ static void ieee80211_add_ht_ie(struct sk_buff *skb, const u8 *ht_info_ie, struct ieee80211_ht_info *ht_info; u8 *pos; u32 flags = channel->flags; - u16 cap = sband->ht_cap.cap; - __le16 tmp; + u16 cap; + struct ieee80211_sta_ht_cap ht_cap; + + BUILD_BUG_ON(sizeof(ht_cap) != sizeof(sband->ht_cap)); if (!sband->ht_cap.ht_supported) return; @@ -114,9 +117,13 @@ static void ieee80211_add_ht_ie(struct sk_buff *skb, const u8 *ht_info_ie, if (ht_info_ie[1] < sizeof(struct ieee80211_ht_info)) return; + memcpy(&ht_cap, &sband->ht_cap, sizeof(ht_cap)); + ieee80211_apply_htcap_overrides(sdata, &ht_cap); + ht_info = (struct ieee80211_ht_info *)(ht_info_ie + 2); /* determine capability flags */ + cap = ht_cap.cap; switch (ht_info->ht_param & IEEE80211_HT_PARAM_CHA_SEC_OFFSET) { case IEEE80211_HT_PARAM_CHA_SEC_ABOVE: @@ -154,34 +161,8 @@ static void ieee80211_add_ht_ie(struct sk_buff *skb, const u8 *ht_info_ie, } /* reserve and fill IE */ - pos = skb_put(skb, sizeof(struct ieee80211_ht_cap) + 2); - *pos++ = WLAN_EID_HT_CAPABILITY; - *pos++ = sizeof(struct ieee80211_ht_cap); - memset(pos, 0, sizeof(struct ieee80211_ht_cap)); - - /* capability flags */ - tmp = cpu_to_le16(cap); - memcpy(pos, &tmp, sizeof(u16)); - pos += sizeof(u16); - - /* AMPDU parameters */ - *pos++ = sband->ht_cap.ampdu_factor | - (sband->ht_cap.ampdu_density << - IEEE80211_HT_AMPDU_PARM_DENSITY_SHIFT); - - /* MCS set */ - memcpy(pos, &sband->ht_cap.mcs, sizeof(sband->ht_cap.mcs)); - pos += sizeof(sband->ht_cap.mcs); - - /* extended capabilities */ - pos += sizeof(__le16); - - /* BF capabilities */ - pos += sizeof(__le32); - - /* antenna selection */ - pos += sizeof(u8); + ieee80211_ie_build_ht_cap(pos, &ht_cap, cap); } static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata, @@ -229,11 +210,9 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata, wk->ie_len + /* extra IEs */ 9, /* WMM */ GFP_KERNEL); - if (!skb) { - printk(KERN_DEBUG "%s: failed to allocate buffer for assoc " - "frame\n", sdata->name); + if (!skb) return; - } + skb_reserve(skb, local->hw.extra_tx_headroom); capab = WLAN_CAPABILITY_ESS; @@ -358,7 +337,7 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata, if (wk->assoc.use_11n && wk->assoc.wmm_used && local->hw.queues >= 4) - ieee80211_add_ht_ie(skb, wk->assoc.ht_information_ie, + ieee80211_add_ht_ie(sdata, skb, wk->assoc.ht_information_ie, sband, wk->chan, wk->assoc.smps); /* if present, add any custom non-vendor IEs that go after HT */ @@ -460,7 +439,7 @@ ieee80211_direct_probe(struct ieee80211_work *wk) */ ieee80211_send_probe_req(sdata, NULL, wk->probe_auth.ssid, wk->probe_auth.ssid_len, NULL, 0, - (u32) -1, true); + (u32) -1, true, false); wk->timeout = jiffies + IEEE80211_AUTH_TIMEOUT; run_again(local, wk->timeout); @@ -579,7 +558,7 @@ ieee80211_offchannel_tx(struct ieee80211_work *wk) /* * After this, offchan_tx.frame remains but now is no * longer a valid pointer -- we still need it as the - * cookie for canceling this work. + * cookie for canceling this work/status matching. */ ieee80211_tx_skb(wk->sdata, wk->offchan_tx.frame); @@ -883,44 +862,6 @@ static void ieee80211_work_rx_queued_mgmt(struct ieee80211_local *local, kfree_skb(skb); } -static bool ieee80211_work_ct_coexists(enum nl80211_channel_type wk_ct, - enum nl80211_channel_type oper_ct) -{ - switch (wk_ct) { - case NL80211_CHAN_NO_HT: - return true; - case NL80211_CHAN_HT20: - if (oper_ct != NL80211_CHAN_NO_HT) - return true; - return false; - case NL80211_CHAN_HT40MINUS: - case NL80211_CHAN_HT40PLUS: - return (wk_ct == oper_ct); - } - WARN_ON(1); /* shouldn't get here */ - return false; -} - -static enum nl80211_channel_type -ieee80211_calc_ct(enum nl80211_channel_type wk_ct, - enum nl80211_channel_type oper_ct) -{ - switch (wk_ct) { - case NL80211_CHAN_NO_HT: - return oper_ct; - case NL80211_CHAN_HT20: - if (oper_ct != NL80211_CHAN_NO_HT) - return oper_ct; - return wk_ct; - case NL80211_CHAN_HT40MINUS: - case NL80211_CHAN_HT40PLUS: - return wk_ct; - } - WARN_ON(1); /* shouldn't get here */ - return wk_ct; -} - - static void ieee80211_work_timer(unsigned long data) { struct ieee80211_local *local = (void *) data; @@ -971,51 +912,12 @@ static void ieee80211_work_work(struct work_struct *work) } if (!started && !local->tmp_channel) { - bool on_oper_chan; - bool tmp_chan_changed = false; - bool on_oper_chan2; - enum nl80211_channel_type wk_ct; - on_oper_chan = ieee80211_cfg_on_oper_channel(local); - - /* Work with existing channel type if possible. */ - wk_ct = wk->chan_type; - if (wk->chan == local->hw.conf.channel) - wk_ct = ieee80211_calc_ct(wk->chan_type, - local->hw.conf.channel_type); - - if (local->tmp_channel) - if ((local->tmp_channel != wk->chan) || - (local->tmp_channel_type != wk_ct)) - tmp_chan_changed = true; + ieee80211_offchannel_stop_vifs(local, true); local->tmp_channel = wk->chan; - local->tmp_channel_type = wk_ct; - /* - * Leave the station vifs in awake mode if they - * happen to be on the same channel as - * the requested channel. - */ - on_oper_chan2 = ieee80211_cfg_on_oper_channel(local); - if (on_oper_chan != on_oper_chan2) { - if (on_oper_chan2) { - /* going off oper channel, PS too */ - ieee80211_offchannel_stop_vifs(local, - true); - ieee80211_hw_config(local, 0); - } else { - /* going on channel, but leave PS - * off-channel. */ - ieee80211_hw_config(local, 0); - ieee80211_offchannel_return(local, - true, - false); - } - } else if (tmp_chan_changed) - /* Still off-channel, but on some other - * channel, so update hardware. - * PS should already be off-channel. - */ - ieee80211_hw_config(local, 0); + local->tmp_channel_type = wk->chan_type; + + ieee80211_hw_config(local, 0); started = true; wk->timeout = jiffies; @@ -1084,35 +986,17 @@ static void ieee80211_work_work(struct work_struct *work) list_for_each_entry(wk, &local->work_list, list) { if (!wk->started) continue; - if (wk->chan != local->tmp_channel) - continue; - if (ieee80211_work_ct_coexists(wk->chan_type, - local->tmp_channel_type)) + if (wk->chan != local->tmp_channel || + wk->chan_type != local->tmp_channel_type) continue; remain_off_channel = true; } if (!remain_off_channel && local->tmp_channel) { - bool on_oper_chan = ieee80211_cfg_on_oper_channel(local); local->tmp_channel = NULL; - /* If tmp_channel wasn't operating channel, then - * we need to go back on-channel. - * NOTE: If we can ever be here while scannning, - * or if the hw_config() channel config logic changes, - * then we may need to do a more thorough check to see if - * we still need to do a hardware config. Currently, - * we cannot be here while scanning, however. - */ - if (ieee80211_cfg_on_oper_channel(local) && !on_oper_chan) - ieee80211_hw_config(local, 0); + ieee80211_hw_config(local, 0); - /* At the least, we need to disable offchannel_ps, - * so just go ahead and run the entire offchannel - * return logic here. We *could* skip enabling - * beaconing if we were already on-oper-channel - * as a future optimization. - */ - ieee80211_offchannel_return(local, true, true); + ieee80211_offchannel_return(local, true); /* give connection some time to breathe */ run_again(local, jiffies + HZ/2); diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c index 7bc8702808fa..422b79851ec5 100644 --- a/net/mac80211/wpa.c +++ b/net/mac80211/wpa.c @@ -53,7 +53,8 @@ ieee80211_tx_h_michael_mic_add(struct ieee80211_tx_data *tx) } if (info->control.hw_key && - !(tx->flags & IEEE80211_TX_FRAGMENTED) && + (info->flags & IEEE80211_TX_CTL_DONTFRAG || + tx->local->ops->set_frag_threshold) && !(tx->key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIC)) { /* hwaccel - with no need for SW-generated MMIC */ return TX_CONTINUE; @@ -105,7 +106,7 @@ ieee80211_rx_h_michael_mic_verify(struct ieee80211_rx_data *rx) if (status->flag & RX_FLAG_MMIC_ERROR) goto mic_fail; - if (!(status->flag & RX_FLAG_IV_STRIPPED)) + if (!(status->flag & RX_FLAG_IV_STRIPPED) && rx->key) goto update_iv; return RX_CONTINUE; @@ -222,14 +223,14 @@ static int tkip_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb) ieee80211_tx_result ieee80211_crypto_tkip_encrypt(struct ieee80211_tx_data *tx) { - struct sk_buff *skb = tx->skb; + struct sk_buff *skb; ieee80211_tx_set_protected(tx); - do { + skb_queue_walk(&tx->skbs, skb) { if (tkip_encrypt_skb(tx, skb) < 0) return TX_DROP; - } while ((skb = skb->next)); + } return TX_CONTINUE; } @@ -389,7 +390,8 @@ static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb) u8 scratch[6 * AES_BLOCK_SIZE]; if (info->control.hw_key && - !(info->control.hw_key->flags & IEEE80211_KEY_FLAG_GENERATE_IV)) { + !(info->control.hw_key->flags & IEEE80211_KEY_FLAG_GENERATE_IV) && + !(info->control.hw_key->flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE)) { /* * hwaccel has no need for preallocated room for CCMP * header or MIC fields @@ -411,6 +413,12 @@ static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb) pos = skb_push(skb, CCMP_HDR_LEN); memmove(pos, pos + CCMP_HDR_LEN, hdrlen); + + /* the HW only needs room for the IV, but not the actual IV */ + if (info->control.hw_key && + (info->control.hw_key->flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE)) + return 0; + hdr = (struct ieee80211_hdr *) pos; pos += hdrlen; @@ -441,14 +449,14 @@ static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb) ieee80211_tx_result ieee80211_crypto_ccmp_encrypt(struct ieee80211_tx_data *tx) { - struct sk_buff *skb = tx->skb; + struct sk_buff *skb; ieee80211_tx_set_protected(tx); - do { + skb_queue_walk(&tx->skbs, skb) { if (ccmp_encrypt_skb(tx, skb) < 0) return TX_DROP; - } while ((skb = skb->next)); + } return TX_CONTINUE; } @@ -546,15 +554,22 @@ static inline void bip_ipn_swap(u8 *d, const u8 *s) ieee80211_tx_result ieee80211_crypto_aes_cmac_encrypt(struct ieee80211_tx_data *tx) { - struct sk_buff *skb = tx->skb; - struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + struct sk_buff *skb; + struct ieee80211_tx_info *info; struct ieee80211_key *key = tx->key; struct ieee80211_mmie *mmie; u8 aad[20]; u64 pn64; + if (WARN_ON(skb_queue_len(&tx->skbs) != 1)) + return TX_DROP; + + skb = skb_peek(&tx->skbs); + + info = IEEE80211_SKB_CB(skb); + if (info->control.hw_key) - return 0; + return TX_CONTINUE; if (WARN_ON(skb_tailroom(skb) < sizeof(*mmie))) return TX_DROP; diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 32bff6d86cb2..f8ac4ef0b794 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -4,6 +4,14 @@ menu "Core Netfilter Configuration" config NETFILTER_NETLINK tristate +config NETFILTER_NETLINK_ACCT +tristate "Netfilter NFACCT over NFNETLINK interface" + depends on NETFILTER_ADVANCED + select NETFILTER_NETLINK + help + If this option is enabled, the kernel will include support + for extended accounting via NFNETLINK. + config NETFILTER_NETLINK_QUEUE tristate "Netfilter NFQUEUE over NFNETLINK interface" depends on NETFILTER_ADVANCED @@ -75,6 +83,16 @@ config NF_CONNTRACK_ZONES If unsure, say `N'. +config NF_CONNTRACK_PROCFS + bool "Supply CT list in procfs (OBSOLETE)" + default y + depends on PROC_FS + ---help--- + This option enables for the list of known conntrack entries + to be shown in procfs under net/netfilter/nf_conntrack. This + is considered obsolete in favor of using the conntrack(8) + tool which uses Netlink. + config NF_CONNTRACK_EVENTS bool "Connection tracking events" depends on NETFILTER_ADVANCED @@ -201,7 +219,6 @@ config NF_CONNTRACK_BROADCAST config NF_CONNTRACK_NETBIOS_NS tristate "NetBIOS name service protocol support" - depends on NETFILTER_ADVANCED select NF_CONNTRACK_BROADCAST help NetBIOS name service requests are sent as broadcast messages from an @@ -505,7 +522,7 @@ config NETFILTER_XT_TARGET_LED echo netfilter-ssh > /sys/class/leds/<ledname>/trigger For more information on the LEDs available on your system, see - Documentation/leds-class.txt + Documentation/leds/leds-class.txt config NETFILTER_XT_TARGET_MARK tristate '"MARK" target support' @@ -542,7 +559,6 @@ config NETFILTER_XT_TARGET_NOTRACK tristate '"NOTRACK" target support' depends on IP_NF_RAW || IP6_NF_RAW depends on NF_CONNTRACK - depends on NETFILTER_ADVANCED help The NOTRACK target allows a select rule to specify which packets *not* to enter the conntrack/NAT @@ -772,6 +788,15 @@ config NETFILTER_XT_MATCH_DSCP To compile it as a module, choose M here. If unsure, say N. +config NETFILTER_XT_MATCH_ECN + tristate '"ecn" match support' + depends on NETFILTER_ADVANCED + ---help--- + This option adds an "ECN" match, which allows you to match against + the IPv4 and TCP header ECN fields. + + To compile it as a module, choose M here. If unsure, say N. + config NETFILTER_XT_MATCH_ESP tristate '"esp" match support' depends on NETFILTER_ADVANCED @@ -881,6 +906,16 @@ config NETFILTER_XT_MATCH_MULTIPORT To compile it as a module, choose M here. If unsure, say N. +config NETFILTER_XT_MATCH_NFACCT + tristate '"nfacct" match support' + depends on NETFILTER_ADVANCED + select NETFILTER_NETLINK_ACCT + help + This option allows you to use the extended accounting through + nfnetlink_acct. + + To compile it as a module, choose M here. If unsure, say N. + config NETFILTER_XT_MATCH_OSF tristate '"osf" Passive OS fingerprint match' depends on NETFILTER_ADVANCED && NETFILTER_NETLINK diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 1a02853df863..40f4c3d636c5 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -7,6 +7,7 @@ nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o obj-$(CONFIG_NETFILTER) = netfilter.o obj-$(CONFIG_NETFILTER_NETLINK) += nfnetlink.o +obj-$(CONFIG_NETFILTER_NETLINK_ACCT) += nfnetlink_acct.o obj-$(CONFIG_NETFILTER_NETLINK_QUEUE) += nfnetlink_queue.o obj-$(CONFIG_NETFILTER_NETLINK_LOG) += nfnetlink_log.o @@ -80,6 +81,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_CPU) += xt_cpu.o obj-$(CONFIG_NETFILTER_XT_MATCH_DCCP) += xt_dccp.o obj-$(CONFIG_NETFILTER_XT_MATCH_DEVGROUP) += xt_devgroup.o obj-$(CONFIG_NETFILTER_XT_MATCH_DSCP) += xt_dscp.o +obj-$(CONFIG_NETFILTER_XT_MATCH_ECN) += xt_ecn.o obj-$(CONFIG_NETFILTER_XT_MATCH_ESP) += xt_esp.o obj-$(CONFIG_NETFILTER_XT_MATCH_HASHLIMIT) += xt_hashlimit.o obj-$(CONFIG_NETFILTER_XT_MATCH_HELPER) += xt_helper.o @@ -90,6 +92,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_LENGTH) += xt_length.o obj-$(CONFIG_NETFILTER_XT_MATCH_LIMIT) += xt_limit.o obj-$(CONFIG_NETFILTER_XT_MATCH_MAC) += xt_mac.o obj-$(CONFIG_NETFILTER_XT_MATCH_MULTIPORT) += xt_multiport.o +obj-$(CONFIG_NETFILTER_XT_MATCH_NFACCT) += xt_nfacct.o obj-$(CONFIG_NETFILTER_XT_MATCH_OSF) += xt_osf.o obj-$(CONFIG_NETFILTER_XT_MATCH_OWNER) += xt_owner.o obj-$(CONFIG_NETFILTER_XT_MATCH_PHYSDEV) += xt_physdev.o diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 899b71c0ff5d..b4e8ff05b301 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -37,7 +37,7 @@ int nf_register_afinfo(const struct nf_afinfo *afinfo) err = mutex_lock_interruptible(&afinfo_mutex); if (err < 0) return err; - rcu_assign_pointer(nf_afinfo[afinfo->family], afinfo); + RCU_INIT_POINTER(nf_afinfo[afinfo->family], afinfo); mutex_unlock(&afinfo_mutex); return 0; } @@ -46,7 +46,7 @@ EXPORT_SYMBOL_GPL(nf_register_afinfo); void nf_unregister_afinfo(const struct nf_afinfo *afinfo) { mutex_lock(&afinfo_mutex); - rcu_assign_pointer(nf_afinfo[afinfo->family], NULL); + RCU_INIT_POINTER(nf_afinfo[afinfo->family], NULL); mutex_unlock(&afinfo_mutex); synchronize_rcu(); } @@ -54,6 +54,12 @@ EXPORT_SYMBOL_GPL(nf_unregister_afinfo); struct list_head nf_hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS] __read_mostly; EXPORT_SYMBOL(nf_hooks); + +#if defined(CONFIG_JUMP_LABEL) +struct jump_label_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; +EXPORT_SYMBOL(nf_hooks_needed); +#endif + static DEFINE_MUTEX(nf_hook_mutex); int nf_register_hook(struct nf_hook_ops *reg) @@ -70,6 +76,9 @@ int nf_register_hook(struct nf_hook_ops *reg) } list_add_rcu(®->list, elem->list.prev); mutex_unlock(&nf_hook_mutex); +#if defined(CONFIG_JUMP_LABEL) + jump_label_inc(&nf_hooks_needed[reg->pf][reg->hooknum]); +#endif return 0; } EXPORT_SYMBOL(nf_register_hook); @@ -79,7 +88,9 @@ void nf_unregister_hook(struct nf_hook_ops *reg) mutex_lock(&nf_hook_mutex); list_del_rcu(®->list); mutex_unlock(&nf_hook_mutex); - +#if defined(CONFIG_JUMP_LABEL) + jump_label_dec(&nf_hooks_needed[reg->pf][reg->hooknum]); +#endif synchronize_net(); } EXPORT_SYMBOL(nf_unregister_hook); @@ -180,17 +191,16 @@ next_hook: if (ret == 0) ret = -EPERM; } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) { - ret = nf_queue(skb, elem, pf, hook, indev, outdev, okfn, - verdict >> NF_VERDICT_QBITS); - if (ret < 0) { - if (ret == -ECANCELED) + int err = nf_queue(skb, elem, pf, hook, indev, outdev, okfn, + verdict >> NF_VERDICT_QBITS); + if (err < 0) { + if (err == -ECANCELED) goto next_hook; - if (ret == -ESRCH && + if (err == -ESRCH && (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS)) goto next_hook; kfree_skb(skb); } - ret = 0; } rcu_read_unlock(); return ret; @@ -219,7 +229,7 @@ int skb_make_writable(struct sk_buff *skb, unsigned int writable_len) } EXPORT_SYMBOL(skb_make_writable); -#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) +#if IS_ENABLED(CONFIG_NF_CONNTRACK) /* This does not belong here, but locally generated errors need it if connection tracking in use: without this, connection may not be in hash table, and hence manufactured ICMP or RST packets will not be associated with it. */ diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index d7e86ef9d23a..86137b558f45 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -1699,10 +1699,8 @@ ip_set_init(void) ip_set_list = kzalloc(sizeof(struct ip_set *) * ip_set_max, GFP_KERNEL); - if (!ip_set_list) { - pr_err("ip_set: Unable to create ip_set_list\n"); + if (!ip_set_list) return -ENOMEM; - } ret = nfnetlink_subsys_register(&ip_set_netlink_subsys); if (ret != 0) { diff --git a/net/netfilter/ipset/ip_set_getport.c b/net/netfilter/ipset/ip_set_getport.c index 757143b2240a..1f03556666f4 100644 --- a/net/netfilter/ipset/ip_set_getport.c +++ b/net/netfilter/ipset/ip_set_getport.c @@ -17,6 +17,7 @@ #include <net/ipv6.h> #include <linux/netfilter/ipset/ip_set_getport.h> +#include <linux/export.h> /* We must handle non-linear skbs */ static bool @@ -108,16 +109,18 @@ ip_set_get_ip4_port(const struct sk_buff *skb, bool src, } EXPORT_SYMBOL_GPL(ip_set_get_ip4_port); -#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) bool ip_set_get_ip6_port(const struct sk_buff *skb, bool src, __be16 *port, u8 *proto) { int protoff; u8 nexthdr; + __be16 frag_off; nexthdr = ipv6_hdr(skb)->nexthdr; - protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr); + protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr, + &frag_off); if (protoff < 0) return false; diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c index f2d576e6b769..4015fcaf87bc 100644 --- a/net/netfilter/ipset/ip_set_hash_ip.c +++ b/net/netfilter/ipset/ip_set_hash_ip.c @@ -241,7 +241,7 @@ hash_ip6_data_isnull(const struct hash_ip6_elem *elem) static inline void hash_ip6_data_copy(struct hash_ip6_elem *dst, const struct hash_ip6_elem *src) { - ipv6_addr_copy(&dst->ip.in6, &src->ip.in6); + dst->ip.in6 = src->ip.in6; } static inline void diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c index 6ee10f5d59bd..37d667e3f6f8 100644 --- a/net/netfilter/ipset/ip_set_hash_ipport.c +++ b/net/netfilter/ipset/ip_set_hash_ipport.c @@ -158,7 +158,7 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[], const struct ip_set_hash *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipport4_elem data = { }; - u32 ip, ip_to, p = 0, port, port_to; + u32 ip, ip_to = 0, p = 0, port, port_to; u32 timeout = h->timeout; bool with_ports = false; int ret; diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c index fb90e344e907..e69e2718fbe1 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportip.c +++ b/net/netfilter/ipset/ip_set_hash_ipportip.c @@ -162,7 +162,7 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[], const struct ip_set_hash *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportip4_elem data = { }; - u32 ip, ip_to, p = 0, port, port_to; + u32 ip, ip_to = 0, p = 0, port, port_to; u32 timeout = h->timeout; bool with_ports = false; int ret; diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c index deb3e3dfa5fc..64199b4e93c9 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportnet.c +++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c @@ -184,7 +184,7 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], const struct ip_set_hash *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportnet4_elem data = { .cidr = HOST_MASK }; - u32 ip, ip_to, p = 0, port, port_to; + u32 ip, ip_to = 0, p = 0, port, port_to; u32 ip2_from = 0, ip2_to, ip2_last, ip2; u32 timeout = h->timeout; bool with_ports = false; diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c index 60d016541c58..28988196775e 100644 --- a/net/netfilter/ipset/ip_set_hash_net.c +++ b/net/netfilter/ipset/ip_set_hash_net.c @@ -267,7 +267,7 @@ static inline void hash_net6_data_copy(struct hash_net6_elem *dst, const struct hash_net6_elem *src) { - ipv6_addr_copy(&dst->ip.in6, &src->ip.in6); + dst->ip.in6 = src->ip.in6; dst->cidr = src->cidr; } diff --git a/net/netfilter/ipset/pfxlen.c b/net/netfilter/ipset/pfxlen.c index bd13d66220f1..4f29fa97044b 100644 --- a/net/netfilter/ipset/pfxlen.c +++ b/net/netfilter/ipset/pfxlen.c @@ -1,3 +1,4 @@ +#include <linux/export.h> #include <linux/netfilter/ipset/pfxlen.h> /* diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig index 70bd1d0774c6..f9871385a65e 100644 --- a/net/netfilter/ipvs/Kconfig +++ b/net/netfilter/ipvs/Kconfig @@ -122,7 +122,6 @@ config IP_VS_RR config IP_VS_WRR tristate "weighted round-robin scheduling" - select GCD ---help--- The weighted robin-robin scheduling algorithm directs network connections to different real servers based on server weights @@ -232,6 +231,21 @@ config IP_VS_NQ If you want to compile it in kernel, say Y. To compile it as a module, choose M here. If unsure, say N. +comment 'IPVS SH scheduler' + +config IP_VS_SH_TAB_BITS + int "IPVS source hashing table size (the Nth power of 2)" + range 4 20 + default 8 + ---help--- + The source hashing scheduler maps source IPs to destinations + stored in a hash table. This table is tiled by each destination + until all slots in the table are filled. When using weights to + allow destinations to receive more connections, the table is + tiled an amount proportional to the weights specified. The table + needs to be large enough to effectively fit all the destinations + multiplied by their respective weights. + comment 'IPVS application helper' config IP_VS_FTP diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 12571fb2881c..29fa5badde75 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -616,7 +616,7 @@ struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp) if ((cp) && (!cp->dest)) { dest = ip_vs_find_dest(ip_vs_conn_net(cp), cp->af, &cp->daddr, cp->dport, &cp->vaddr, cp->vport, - cp->protocol, cp->fwmark); + cp->protocol, cp->fwmark, cp->flags); ip_vs_bind_dest(cp, dest); return dest; } else diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 4f77bb16d22a..611c3359b94d 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -188,14 +188,13 @@ ip_vs_conn_stats(struct ip_vs_conn *cp, struct ip_vs_service *svc) } -static inline int +static inline void ip_vs_set_state(struct ip_vs_conn *cp, int direction, const struct sk_buff *skb, struct ip_vs_proto_data *pd) { - if (unlikely(!pd->pp->state_transition)) - return 0; - return pd->pp->state_transition(cp, direction, skb, pd); + if (likely(pd->pp->state_transition)) + pd->pp->state_transition(cp, direction, skb, pd); } static inline int @@ -530,7 +529,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, a cache_bypass connection entry */ ipvs = net_ipvs(net); if (ipvs->sysctl_cache_bypass && svc->fwmark && unicast) { - int ret, cs; + int ret; struct ip_vs_conn *cp; unsigned int flags = (svc->flags & IP_VS_SVC_F_ONEPACKET && iph.protocol == IPPROTO_UDP)? @@ -557,7 +556,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, ip_vs_in_stats(cp, skb); /* set state */ - cs = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pd); + ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pd); /* transmit the first SYN packet */ ret = cp->packet_xmit(skb, cp, pd->pp); @@ -984,7 +983,7 @@ static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related, if (!cp) return NF_ACCEPT; - ipv6_addr_copy(&snet.in6, &iph->saddr); + snet.in6 = iph->saddr; return handle_response_icmp(AF_INET6, skb, &snet, cih->nexthdr, cp, pp, offset, sizeof(struct ipv6hdr)); } @@ -1490,7 +1489,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) struct ip_vs_protocol *pp; struct ip_vs_proto_data *pd; struct ip_vs_conn *cp; - int ret, restart, pkts; + int ret, pkts; struct netns_ipvs *ipvs; /* Already marked as IPVS request or reply? */ @@ -1591,7 +1590,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) } ip_vs_in_stats(cp, skb); - restart = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pd); + ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pd); if (cp->packet_xmit) ret = cp->packet_xmit(skb, cp, pp); /* do not touch skb anymore */ @@ -1878,10 +1877,9 @@ static int __net_init __ip_vs_init(struct net *net) struct netns_ipvs *ipvs; ipvs = net_generic(net, ip_vs_net_id); - if (ipvs == NULL) { - pr_err("%s(): no memory.\n", __func__); + if (ipvs == NULL) return -ENOMEM; - } + /* Hold the beast until a service is registerd */ ipvs->enable = 0; ipvs->net = net; diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index e3be48bf4dcd..b3afe189af61 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -85,7 +85,7 @@ static int __ip_vs_addr_is_local_v6(struct net *net, }; rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6); - if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK)) + if (rt && rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK)) return 1; return 0; @@ -619,15 +619,21 @@ struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af, const union nf_inet_addr *daddr, __be16 dport, const union nf_inet_addr *vaddr, - __be16 vport, __u16 protocol, __u32 fwmark) + __be16 vport, __u16 protocol, __u32 fwmark, + __u32 flags) { struct ip_vs_dest *dest; struct ip_vs_service *svc; + __be16 port = dport; svc = ip_vs_service_get(net, af, fwmark, protocol, vaddr, vport); if (!svc) return NULL; - dest = ip_vs_lookup_dest(svc, daddr, dport); + if (fwmark && (flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) + port = 0; + dest = ip_vs_lookup_dest(svc, daddr, port); + if (!dest) + dest = ip_vs_lookup_dest(svc, daddr, port ^ dport); if (dest) atomic_inc(&dest->refcnt); ip_vs_service_put(svc); @@ -856,15 +862,12 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest, } dest = kzalloc(sizeof(struct ip_vs_dest), GFP_KERNEL); - if (dest == NULL) { - pr_err("%s(): no memory.\n", __func__); + if (dest == NULL) return -ENOMEM; - } + dest->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats); - if (!dest->stats.cpustats) { - pr_err("%s() alloc_percpu failed\n", __func__); + if (!dest->stats.cpustats) goto err_alloc; - } dest->af = svc->af; dest->protocol = svc->protocol; @@ -1168,10 +1171,8 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u, goto out_err; } svc->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats); - if (!svc->stats.cpustats) { - pr_err("%s() alloc_percpu failed\n", __func__); + if (!svc->stats.cpustats) goto out_err; - } /* I'm the first user of the service */ atomic_set(&svc->usecnt, 0); @@ -3326,10 +3327,8 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) int ret = 0, cmd; int need_full_svc = 0, need_full_dest = 0; struct net *net; - struct netns_ipvs *ipvs; net = skb_sknet(skb); - ipvs = net_ipvs(net); cmd = info->genlhdr->cmd; mutex_lock(&__ip_vs_mutex); @@ -3421,10 +3420,8 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info) void *reply; int ret, cmd, reply_cmd; struct net *net; - struct netns_ipvs *ipvs; net = skb_sknet(skb); - ipvs = net_ipvs(net); cmd = info->genlhdr->cmd; if (cmd == IPVS_CMD_GET_SERVICE) @@ -3720,10 +3717,9 @@ int __net_init ip_vs_control_net_init(struct net *net) /* procfs stats */ ipvs->tot_stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats); - if (!ipvs->tot_stats.cpustats) { - pr_err("%s(): alloc_percpu.\n", __func__); + if (!ipvs->tot_stats.cpustats) return -ENOMEM; - } + spin_lock_init(&ipvs->tot_stats.lock); proc_net_fops_create(net, "ip_vs", 0, &ip_vs_info_fops); diff --git a/net/netfilter/ipvs/ip_vs_dh.c b/net/netfilter/ipvs/ip_vs_dh.c index 95fd0d14200b..1c269e56200a 100644 --- a/net/netfilter/ipvs/ip_vs_dh.c +++ b/net/netfilter/ipvs/ip_vs_dh.c @@ -150,10 +150,9 @@ static int ip_vs_dh_init_svc(struct ip_vs_service *svc) /* allocate the DH table for this service */ tbl = kmalloc(sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE, GFP_ATOMIC); - if (tbl == NULL) { - pr_err("%s(): no memory\n", __func__); + if (tbl == NULL) return -ENOMEM; - } + svc->sched_data = tbl; IP_VS_DBG(6, "DH hash table (memory=%Zdbytes) allocated for " "current service\n", diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c index 4490a32ad5b2..538d74ee4f68 100644 --- a/net/netfilter/ipvs/ip_vs_ftp.c +++ b/net/netfilter/ipvs/ip_vs_ftp.c @@ -52,8 +52,9 @@ * List of ports (up to IP_VS_APP_MAX_PORTS) to be handled by helper * First port is set to the default port. */ +static unsigned int ports_count = 1; static unsigned short ports[IP_VS_APP_MAX_PORTS] = {21, 0}; -module_param_array(ports, ushort, NULL, 0); +module_param_array(ports, ushort, &ports_count, 0444); MODULE_PARM_DESC(ports, "Ports to monitor for FTP control commands"); @@ -449,7 +450,7 @@ static int __net_init __ip_vs_ftp_init(struct net *net) if (ret) goto err_exit; - for (i=0; i<IP_VS_APP_MAX_PORTS; i++) { + for (i = 0; i < ports_count; i++) { if (!ports[i]) continue; ret = register_ip_vs_app_inc(net, app, app->protocol, ports[i]); diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c index 87e40ea77a95..0f16283fd058 100644 --- a/net/netfilter/ipvs/ip_vs_lblc.c +++ b/net/netfilter/ipvs/ip_vs_lblc.c @@ -202,10 +202,8 @@ ip_vs_lblc_new(struct ip_vs_lblc_table *tbl, const union nf_inet_addr *daddr, en = ip_vs_lblc_get(dest->af, tbl, daddr); if (!en) { en = kmalloc(sizeof(*en), GFP_ATOMIC); - if (!en) { - pr_err("%s(): no memory\n", __func__); + if (!en) return NULL; - } en->af = dest->af; ip_vs_addr_copy(dest->af, &en->addr, daddr); @@ -345,10 +343,9 @@ static int ip_vs_lblc_init_svc(struct ip_vs_service *svc) * Allocate the ip_vs_lblc_table for this service */ tbl = kmalloc(sizeof(*tbl), GFP_ATOMIC); - if (tbl == NULL) { - pr_err("%s(): no memory\n", __func__); + if (tbl == NULL) return -ENOMEM; - } + svc->sched_data = tbl; IP_VS_DBG(6, "LBLC hash table (memory=%Zdbytes) allocated for " "current service\n", sizeof(*tbl)); diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c index 90f618ab6dda..eec797f8cce7 100644 --- a/net/netfilter/ipvs/ip_vs_lblcr.c +++ b/net/netfilter/ipvs/ip_vs_lblcr.c @@ -112,10 +112,8 @@ ip_vs_dest_set_insert(struct ip_vs_dest_set *set, struct ip_vs_dest *dest) } e = kmalloc(sizeof(*e), GFP_ATOMIC); - if (e == NULL) { - pr_err("%s(): no memory\n", __func__); + if (e == NULL) return NULL; - } atomic_inc(&dest->refcnt); e->dest = dest; @@ -373,10 +371,8 @@ ip_vs_lblcr_new(struct ip_vs_lblcr_table *tbl, const union nf_inet_addr *daddr, en = ip_vs_lblcr_get(dest->af, tbl, daddr); if (!en) { en = kmalloc(sizeof(*en), GFP_ATOMIC); - if (!en) { - pr_err("%s(): no memory\n", __func__); + if (!en) return NULL; - } en->af = dest->af; ip_vs_addr_copy(dest->af, &en->addr, daddr); @@ -516,10 +512,9 @@ static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc) * Allocate the ip_vs_lblcr_table for this service */ tbl = kmalloc(sizeof(*tbl), GFP_ATOMIC); - if (tbl == NULL) { - pr_err("%s(): no memory\n", __func__); + if (tbl == NULL) return -ENOMEM; - } + svc->sched_data = tbl; IP_VS_DBG(6, "LBLCR hash table (memory=%Zdbytes) allocated for " "current service\n", sizeof(*tbl)); diff --git a/net/netfilter/ipvs/ip_vs_nfct.c b/net/netfilter/ipvs/ip_vs_nfct.c index f454c80df0a7..022e77e1e766 100644 --- a/net/netfilter/ipvs/ip_vs_nfct.c +++ b/net/netfilter/ipvs/ip_vs_nfct.c @@ -127,7 +127,7 @@ ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, int outin) nf_conntrack_alter_reply(ct, &new_tuple); } -int ip_vs_confirm_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp) +int ip_vs_confirm_conntrack(struct sk_buff *skb) { return nf_conntrack_confirm(skb); } diff --git a/net/netfilter/ipvs/ip_vs_pe_sip.c b/net/netfilter/ipvs/ip_vs_pe_sip.c index 13d607ae9c52..1aa5cac748c4 100644 --- a/net/netfilter/ipvs/ip_vs_pe_sip.c +++ b/net/netfilter/ipvs/ip_vs_pe_sip.c @@ -108,7 +108,7 @@ static bool ip_vs_sip_ct_match(const struct ip_vs_conn_param *p, struct ip_vs_conn *ct) { - bool ret = 0; + bool ret = false; if (ct->af == p->af && ip_vs_addr_equal(p->af, p->caddr, &ct->caddr) && @@ -121,7 +121,7 @@ static bool ip_vs_sip_ct_match(const struct ip_vs_conn_param *p, ct->protocol == p->protocol && ct->pe_data && ct->pe_data_len == p->pe_data_len && !memcmp(ct->pe_data, p->pe_data, p->pe_data_len)) - ret = 1; + ret = true; IP_VS_DBG_BUF(9, "SIP template match %s %s->%s:%d %s\n", ip_vs_proto_name(p->protocol), diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c index 52d073c105e9..85312939695f 100644 --- a/net/netfilter/ipvs/ip_vs_proto.c +++ b/net/netfilter/ipvs/ip_vs_proto.c @@ -74,10 +74,9 @@ register_ip_vs_proto_netns(struct net *net, struct ip_vs_protocol *pp) struct ip_vs_proto_data *pd = kzalloc(sizeof(struct ip_vs_proto_data), GFP_ATOMIC); - if (!pd) { - pr_err("%s(): no memory.\n", __func__); + if (!pd) return -ENOMEM; - } + pd->pp = pp; /* For speed issues */ pd->next = ipvs->proto_data_table[hash]; ipvs->proto_data_table[hash] = pd; diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index d12ed53ec95f..1fbf7a2816f5 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -906,7 +906,7 @@ static const char *sctp_state_name(int state) return "?"; } -static inline int +static inline void set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp, int direction, const struct sk_buff *skb) { @@ -924,7 +924,7 @@ set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp, sch = skb_header_pointer(skb, ihl + sizeof(sctp_sctphdr_t), sizeof(_sctpch), &_sctpch); if (sch == NULL) - return 0; + return; chunk_type = sch->type; /* @@ -993,21 +993,15 @@ set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp, cp->timeout = pd->timeout_table[cp->state = next_state]; else /* What to do ? */ cp->timeout = sctp_timeouts[cp->state = next_state]; - - return 1; } -static int +static void sctp_state_transition(struct ip_vs_conn *cp, int direction, const struct sk_buff *skb, struct ip_vs_proto_data *pd) { - int ret = 0; - spin_lock(&cp->lock); - ret = set_sctp_state(pd, cp, direction, skb); + set_sctp_state(pd, cp, direction, skb); spin_unlock(&cp->lock); - - return ret; } static inline __u16 sctp_app_hashkey(__be16 port) diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c index c0cc341b840d..ef8641f7af83 100644 --- a/net/netfilter/ipvs/ip_vs_proto_tcp.c +++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c @@ -546,7 +546,7 @@ set_tcp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp, /* * Handle state transitions */ -static int +static void tcp_state_transition(struct ip_vs_conn *cp, int direction, const struct sk_buff *skb, struct ip_vs_proto_data *pd) @@ -561,13 +561,11 @@ tcp_state_transition(struct ip_vs_conn *cp, int direction, th = skb_header_pointer(skb, ihl, sizeof(_tcph), &_tcph); if (th == NULL) - return 0; + return; spin_lock(&cp->lock); set_tcp_state(pd, cp, direction, th); spin_unlock(&cp->lock); - - return 1; } static inline __u16 tcp_app_hashkey(__be16 port) diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c index f1282cbe6fe3..f4b7262896bb 100644 --- a/net/netfilter/ipvs/ip_vs_proto_udp.c +++ b/net/netfilter/ipvs/ip_vs_proto_udp.c @@ -454,18 +454,17 @@ static const char * udp_state_name(int state) return udp_state_name_table[state] ? udp_state_name_table[state] : "?"; } -static int +static void udp_state_transition(struct ip_vs_conn *cp, int direction, const struct sk_buff *skb, struct ip_vs_proto_data *pd) { if (unlikely(!pd)) { pr_err("UDP no ns data\n"); - return 0; + return; } cp->timeout = pd->timeout_table[IP_VS_UDP_S_NORMAL]; - return 1; } static void __udp_init(struct net *net, struct ip_vs_proto_data *pd) diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c index b5e2556c581a..069e8d4d5c01 100644 --- a/net/netfilter/ipvs/ip_vs_sh.c +++ b/net/netfilter/ipvs/ip_vs_sh.c @@ -30,6 +30,11 @@ * server is dead or overloaded, the load balancer can bypass the cache * server and send requests to the original server directly. * + * The weight destination attribute can be used to control the + * distribution of connections to the destinations in servernode. The + * greater the weight, the more connections the destination + * will receive. + * */ #define KMSG_COMPONENT "IPVS" @@ -99,9 +104,11 @@ ip_vs_sh_assign(struct ip_vs_sh_bucket *tbl, struct ip_vs_service *svc) struct ip_vs_sh_bucket *b; struct list_head *p; struct ip_vs_dest *dest; + int d_count; b = tbl; p = &svc->destinations; + d_count = 0; for (i=0; i<IP_VS_SH_TAB_SIZE; i++) { if (list_empty(p)) { b->dest = NULL; @@ -113,7 +120,16 @@ ip_vs_sh_assign(struct ip_vs_sh_bucket *tbl, struct ip_vs_service *svc) atomic_inc(&dest->refcnt); b->dest = dest; - p = p->next; + IP_VS_DBG_BUF(6, "assigned i: %d dest: %s weight: %d\n", + i, IP_VS_DBG_ADDR(svc->af, &dest->addr), + atomic_read(&dest->weight)); + + /* Don't move to next dest until filling weight */ + if (++d_count >= atomic_read(&dest->weight)) { + p = p->next; + d_count = 0; + } + } b++; } @@ -147,10 +163,9 @@ static int ip_vs_sh_init_svc(struct ip_vs_service *svc) /* allocate the SH table for this service */ tbl = kmalloc(sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE, GFP_ATOMIC); - if (tbl == NULL) { - pr_err("%s(): no memory\n", __func__); + if (tbl == NULL) return -ENOMEM; - } + svc->sched_data = tbl; IP_VS_DBG(6, "SH hash table (memory=%Zdbytes) allocated for " "current service\n", diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index 3cdd479f9b5d..8a0d6d6889f0 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -603,9 +603,9 @@ sloop: #ifdef CONFIG_IP_VS_IPV6 if (cp->af == AF_INET6) { p += sizeof(struct ip_vs_sync_v6); - ipv6_addr_copy(&s->v6.caddr, &cp->caddr.in6); - ipv6_addr_copy(&s->v6.vaddr, &cp->vaddr.in6); - ipv6_addr_copy(&s->v6.daddr, &cp->daddr.in6); + s->v6.caddr = cp->caddr.in6; + s->v6.vaddr = cp->vaddr.in6; + s->v6.daddr = cp->daddr.in6; } else #endif { @@ -740,7 +740,7 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param, * but still handled. */ dest = ip_vs_find_dest(net, type, daddr, dport, param->vaddr, - param->vport, protocol, fwmark); + param->vport, protocol, fwmark, flags); /* Set the approprite ativity flag */ if (protocol == IPPROTO_TCP) { diff --git a/net/netfilter/ipvs/ip_vs_wrr.c b/net/netfilter/ipvs/ip_vs_wrr.c index 1ef41f50723c..fd0d4e09876a 100644 --- a/net/netfilter/ipvs/ip_vs_wrr.c +++ b/net/netfilter/ipvs/ip_vs_wrr.c @@ -85,10 +85,9 @@ static int ip_vs_wrr_init_svc(struct ip_vs_service *svc) * Allocate the mark variable for WRR scheduling */ mark = kmalloc(sizeof(struct ip_vs_wrr_mark), GFP_ATOMIC); - if (mark == NULL) { - pr_err("%s(): no memory\n", __func__); + if (mark == NULL) return -ENOMEM; - } + mark->cl = &svc->destinations; mark->cw = 0; mark->mw = ip_vs_wrr_max_weight(svc); diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index ee319a4338b0..7fd66dec859d 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -207,7 +207,7 @@ __ip_vs_reroute_locally(struct sk_buff *skb) static inline int __ip_vs_is_local_route6(struct rt6_info *rt) { - return rt->rt6i_dev && rt->rt6i_dev->flags & IFF_LOOPBACK; + return rt->dst.dev && rt->dst.dev->flags & IFF_LOOPBACK; } static struct dst_entry * @@ -235,7 +235,7 @@ __ip_vs_route_output_v6(struct net *net, struct in6_addr *daddr, goto out_err; } } - ipv6_addr_copy(ret_saddr, &fl6.saddr); + *ret_saddr = fl6.saddr; return dst; out_err: @@ -279,7 +279,7 @@ __ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest, atomic_read(&rt->dst.__refcnt)); } if (ret_saddr) - ipv6_addr_copy(ret_saddr, &dest->dst_saddr.in6); + *ret_saddr = dest->dst_saddr.in6; spin_unlock(&dest->dst_lock); } else { dst = __ip_vs_route_output_v6(net, daddr, ret_saddr, do_xfrm); @@ -339,7 +339,7 @@ ip_vs_dst_reset(struct ip_vs_dest *dest) \ (skb)->ipvs_property = 1; \ if (unlikely((cp)->flags & IP_VS_CONN_F_NFCT)) \ - __ret = ip_vs_confirm_conntrack(skb, cp); \ + __ret = ip_vs_confirm_conntrack(skb); \ if (__ret == NF_ACCEPT) { \ nf_reset(skb); \ skb_forward_csum(skb); \ @@ -541,7 +541,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, * Avoid duplicate tuple in reply direction for NAT traffic * to local address when connection is sync-ed */ -#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) +#if IS_ENABLED(CONFIG_NF_CONNTRACK) if (cp->flags & IP_VS_CONN_F_SYNC && local) { enum ip_conntrack_info ctinfo; struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo); @@ -658,7 +658,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, * Avoid duplicate tuple in reply direction for NAT traffic * to local address when connection is sync-ed */ -#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) +#if IS_ENABLED(CONFIG_NF_CONNTRACK) if (cp->flags & IP_VS_CONN_F_SYNC && local) { enum ip_conntrack_info ctinfo; struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo); @@ -705,7 +705,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, /* mangle the packet */ if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp)) goto tx_error; - ipv6_addr_copy(&ipv6_hdr(skb)->daddr, &cp->daddr.in6); + ipv6_hdr(skb)->daddr = cp->daddr.in6; if (!local || !skb->dev) { /* drop the old route when skb is not shared */ @@ -967,8 +967,8 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, be16_add_cpu(&iph->payload_len, sizeof(*old_iph)); iph->priority = old_iph->priority; memset(&iph->flow_lbl, 0, sizeof(iph->flow_lbl)); - ipv6_addr_copy(&iph->daddr, &cp->daddr.in6); - ipv6_addr_copy(&iph->saddr, &saddr); + iph->daddr = cp->daddr.in6; + iph->saddr = saddr; iph->hop_limit = old_iph->hop_limit; /* Another hack: avoid icmp_send in ip_fragment */ @@ -1173,7 +1173,7 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, * Avoid duplicate tuple in reply direction for NAT traffic * to local address when connection is sync-ed */ -#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) +#if IS_ENABLED(CONFIG_NF_CONNTRACK) if (cp->flags & IP_VS_CONN_F_SYNC && local) { enum ip_conntrack_info ctinfo; struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo); @@ -1293,7 +1293,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, * Avoid duplicate tuple in reply direction for NAT traffic * to local address when connection is sync-ed */ -#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) +#if IS_ENABLED(CONFIG_NF_CONNTRACK) if (cp->flags & IP_VS_CONN_F_SYNC && local) { enum ip_conntrack_info ctinfo; struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo); diff --git a/net/netfilter/nf_conntrack_acct.c b/net/netfilter/nf_conntrack_acct.c index 5178c691ecbf..f4f8cda05986 100644 --- a/net/netfilter/nf_conntrack_acct.c +++ b/net/netfilter/nf_conntrack_acct.c @@ -12,12 +12,13 @@ #include <linux/slab.h> #include <linux/kernel.h> #include <linux/moduleparam.h> +#include <linux/export.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_extend.h> #include <net/netfilter/nf_conntrack_acct.h> -static int nf_ct_acct __read_mostly; +static bool nf_ct_acct __read_mostly; module_param_named(acct, nf_ct_acct, bool, 0644); MODULE_PARM_DESC(acct, "Enable connection tracking flow accounting."); @@ -45,8 +46,8 @@ seq_print_acct(struct seq_file *s, const struct nf_conn *ct, int dir) return 0; return seq_printf(s, "packets=%llu bytes=%llu ", - (unsigned long long)acct[dir].packets, - (unsigned long long)acct[dir].bytes); + (unsigned long long)atomic64_read(&acct[dir].packets), + (unsigned long long)atomic64_read(&acct[dir].bytes)); }; EXPORT_SYMBOL_GPL(seq_print_acct); diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index f7af8b866017..76613f5a55c0 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -67,6 +67,7 @@ DEFINE_PER_CPU(struct nf_conn, nf_conntrack_untracked); EXPORT_PER_CPU_SYMBOL(nf_conntrack_untracked); unsigned int nf_conntrack_hash_rnd __read_mostly; +EXPORT_SYMBOL_GPL(nf_conntrack_hash_rnd); static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple, u16 zone) { @@ -661,7 +662,6 @@ __nf_conntrack_alloc(struct net *net, u16 zone, */ ct = kmem_cache_alloc(net->ct.nf_conntrack_cachep, gfp); if (ct == NULL) { - pr_debug("nf_conntrack_alloc: Can't alloc conntrack.\n"); atomic_dec(&net->ct.count); return ERR_PTR(-ENOMEM); } @@ -749,10 +749,8 @@ init_conntrack(struct net *net, struct nf_conn *tmpl, ct = __nf_conntrack_alloc(net, zone, tuple, &repl_tuple, GFP_ATOMIC, hash); - if (IS_ERR(ct)) { - pr_debug("Can't allocate conntrack.\n"); + if (IS_ERR(ct)) return (struct nf_conntrack_tuple_hash *)ct; - } if (!l4proto->new(ct, skb, dataoff)) { nf_conntrack_free(ct); @@ -1047,10 +1045,8 @@ acct: acct = nf_conn_acct_find(ct); if (acct) { - spin_lock_bh(&ct->lock); - acct[CTINFO2DIR(ctinfo)].packets++; - acct[CTINFO2DIR(ctinfo)].bytes += skb->len; - spin_unlock_bh(&ct->lock); + atomic64_inc(&acct[CTINFO2DIR(ctinfo)].packets); + atomic64_add(skb->len, &acct[CTINFO2DIR(ctinfo)].bytes); } } } @@ -1066,11 +1062,9 @@ bool __nf_ct_kill_acct(struct nf_conn *ct, acct = nf_conn_acct_find(ct); if (acct) { - spin_lock_bh(&ct->lock); - acct[CTINFO2DIR(ctinfo)].packets++; - acct[CTINFO2DIR(ctinfo)].bytes += - skb->len - skb_network_offset(skb); - spin_unlock_bh(&ct->lock); + atomic64_inc(&acct[CTINFO2DIR(ctinfo)].packets); + atomic64_add(skb->len - skb_network_offset(skb), + &acct[CTINFO2DIR(ctinfo)].bytes); } } @@ -1090,7 +1084,7 @@ static struct nf_ct_ext_type nf_ct_zone_extend __read_mostly = { }; #endif -#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) +#if IS_ENABLED(CONFIG_NF_CT_NETLINK) #include <linux/netfilter/nfnetlink.h> #include <linux/netfilter/nfnetlink_conntrack.h> @@ -1317,7 +1311,7 @@ static void nf_conntrack_cleanup_net(struct net *net) void nf_conntrack_cleanup(struct net *net) { if (net_eq(net, &init_net)) - rcu_assign_pointer(ip_ct_attach, NULL); + RCU_INIT_POINTER(ip_ct_attach, NULL); /* This makes sure all current packets have passed through netfilter framework. Roll on, two-stage module @@ -1327,7 +1321,7 @@ void nf_conntrack_cleanup(struct net *net) nf_conntrack_cleanup_net(net); if (net_eq(net, &init_net)) { - rcu_assign_pointer(nf_ct_destroy, NULL); + RCU_INIT_POINTER(nf_ct_destroy, NULL); nf_conntrack_cleanup_init_net(); } } @@ -1345,8 +1339,7 @@ void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls) get_order(sz)); if (!hash) { printk(KERN_WARNING "nf_conntrack: falling back to vmalloc.\n"); - hash = __vmalloc(sz, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, - PAGE_KERNEL); + hash = vzalloc(sz); } if (hash && nulls) @@ -1576,11 +1569,11 @@ int nf_conntrack_init(struct net *net) if (net_eq(net, &init_net)) { /* For use by REJECT target */ - rcu_assign_pointer(ip_ct_attach, nf_conntrack_attach); - rcu_assign_pointer(nf_ct_destroy, destroy_conntrack); + RCU_INIT_POINTER(ip_ct_attach, nf_conntrack_attach); + RCU_INIT_POINTER(nf_ct_destroy, destroy_conntrack); /* Howto get NAT offsets */ - rcu_assign_pointer(nf_ct_nat_offset, NULL); + RCU_INIT_POINTER(nf_ct_nat_offset, NULL); } return 0; diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c index 63a1b915a7e4..14af6329bdda 100644 --- a/net/netfilter/nf_conntrack_ecache.c +++ b/net/netfilter/nf_conntrack_ecache.c @@ -19,6 +19,7 @@ #include <linux/kernel.h> #include <linux/netdevice.h> #include <linux/slab.h> +#include <linux/export.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_core.h> @@ -26,22 +27,17 @@ static DEFINE_MUTEX(nf_ct_ecache_mutex); -struct nf_ct_event_notifier __rcu *nf_conntrack_event_cb __read_mostly; -EXPORT_SYMBOL_GPL(nf_conntrack_event_cb); - -struct nf_exp_event_notifier __rcu *nf_expect_event_cb __read_mostly; -EXPORT_SYMBOL_GPL(nf_expect_event_cb); - /* deliver cached events and clear cache entry - must be called with locally * disabled softirqs */ void nf_ct_deliver_cached_events(struct nf_conn *ct) { + struct net *net = nf_ct_net(ct); unsigned long events; struct nf_ct_event_notifier *notify; struct nf_conntrack_ecache *e; rcu_read_lock(); - notify = rcu_dereference(nf_conntrack_event_cb); + notify = rcu_dereference(net->ct.nf_conntrack_event_cb); if (notify == NULL) goto out_unlock; @@ -82,19 +78,20 @@ out_unlock: } EXPORT_SYMBOL_GPL(nf_ct_deliver_cached_events); -int nf_conntrack_register_notifier(struct nf_ct_event_notifier *new) +int nf_conntrack_register_notifier(struct net *net, + struct nf_ct_event_notifier *new) { int ret = 0; struct nf_ct_event_notifier *notify; mutex_lock(&nf_ct_ecache_mutex); - notify = rcu_dereference_protected(nf_conntrack_event_cb, + notify = rcu_dereference_protected(net->ct.nf_conntrack_event_cb, lockdep_is_held(&nf_ct_ecache_mutex)); if (notify != NULL) { ret = -EBUSY; goto out_unlock; } - rcu_assign_pointer(nf_conntrack_event_cb, new); + rcu_assign_pointer(net->ct.nf_conntrack_event_cb, new); mutex_unlock(&nf_ct_ecache_mutex); return ret; @@ -104,32 +101,34 @@ out_unlock: } EXPORT_SYMBOL_GPL(nf_conntrack_register_notifier); -void nf_conntrack_unregister_notifier(struct nf_ct_event_notifier *new) +void nf_conntrack_unregister_notifier(struct net *net, + struct nf_ct_event_notifier *new) { struct nf_ct_event_notifier *notify; mutex_lock(&nf_ct_ecache_mutex); - notify = rcu_dereference_protected(nf_conntrack_event_cb, + notify = rcu_dereference_protected(net->ct.nf_conntrack_event_cb, lockdep_is_held(&nf_ct_ecache_mutex)); BUG_ON(notify != new); - rcu_assign_pointer(nf_conntrack_event_cb, NULL); + RCU_INIT_POINTER(net->ct.nf_conntrack_event_cb, NULL); mutex_unlock(&nf_ct_ecache_mutex); } EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier); -int nf_ct_expect_register_notifier(struct nf_exp_event_notifier *new) +int nf_ct_expect_register_notifier(struct net *net, + struct nf_exp_event_notifier *new) { int ret = 0; struct nf_exp_event_notifier *notify; mutex_lock(&nf_ct_ecache_mutex); - notify = rcu_dereference_protected(nf_expect_event_cb, + notify = rcu_dereference_protected(net->ct.nf_expect_event_cb, lockdep_is_held(&nf_ct_ecache_mutex)); if (notify != NULL) { ret = -EBUSY; goto out_unlock; } - rcu_assign_pointer(nf_expect_event_cb, new); + rcu_assign_pointer(net->ct.nf_expect_event_cb, new); mutex_unlock(&nf_ct_ecache_mutex); return ret; @@ -139,15 +138,16 @@ out_unlock: } EXPORT_SYMBOL_GPL(nf_ct_expect_register_notifier); -void nf_ct_expect_unregister_notifier(struct nf_exp_event_notifier *new) +void nf_ct_expect_unregister_notifier(struct net *net, + struct nf_exp_event_notifier *new) { struct nf_exp_event_notifier *notify; mutex_lock(&nf_ct_ecache_mutex); - notify = rcu_dereference_protected(nf_expect_event_cb, + notify = rcu_dereference_protected(net->ct.nf_expect_event_cb, lockdep_is_held(&nf_ct_ecache_mutex)); BUG_ON(notify != new); - rcu_assign_pointer(nf_expect_event_cb, NULL); + RCU_INIT_POINTER(net->ct.nf_expect_event_cb, NULL); mutex_unlock(&nf_ct_ecache_mutex); } EXPORT_SYMBOL_GPL(nf_ct_expect_unregister_notifier); diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index cd1e8e0970f2..4147ba3f653c 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -20,6 +20,8 @@ #include <linux/percpu.h> #include <linux/kernel.h> #include <linux/jhash.h> +#include <linux/moduleparam.h> +#include <linux/export.h> #include <net/net_namespace.h> #include <net/netfilter/nf_conntrack.h> @@ -36,8 +38,6 @@ unsigned int nf_ct_expect_max __read_mostly; static struct kmem_cache *nf_ct_expect_cachep __read_mostly; -static HLIST_HEAD(nf_ct_userspace_expect_list); - /* nf_conntrack_expect helper functions */ void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp, u32 pid, int report) @@ -45,14 +45,14 @@ void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp, struct nf_conn_help *master_help = nfct_help(exp->master); struct net *net = nf_ct_exp_net(exp); + NF_CT_ASSERT(master_help); NF_CT_ASSERT(!timer_pending(&exp->timeout)); hlist_del_rcu(&exp->hnode); net->ct.expect_count--; hlist_del(&exp->lnode); - if (!(exp->flags & NF_CT_EXPECT_USERSPACE)) - master_help->expecting[exp->class]--; + master_help->expecting[exp->class]--; nf_ct_expect_event_report(IPEXP_DESTROY, exp, pid, report); nf_ct_expect_put(exp); @@ -312,37 +312,34 @@ void nf_ct_expect_put(struct nf_conntrack_expect *exp) } EXPORT_SYMBOL_GPL(nf_ct_expect_put); -static void nf_ct_expect_insert(struct nf_conntrack_expect *exp) +static int nf_ct_expect_insert(struct nf_conntrack_expect *exp) { struct nf_conn_help *master_help = nfct_help(exp->master); + struct nf_conntrack_helper *helper; struct net *net = nf_ct_exp_net(exp); - const struct nf_conntrack_expect_policy *p; unsigned int h = nf_ct_expect_dst_hash(&exp->tuple); /* two references : one for hash insert, one for the timer */ atomic_add(2, &exp->use); - if (master_help) { - hlist_add_head(&exp->lnode, &master_help->expectations); - master_help->expecting[exp->class]++; - } else if (exp->flags & NF_CT_EXPECT_USERSPACE) - hlist_add_head(&exp->lnode, &nf_ct_userspace_expect_list); + hlist_add_head(&exp->lnode, &master_help->expectations); + master_help->expecting[exp->class]++; hlist_add_head_rcu(&exp->hnode, &net->ct.expect_hash[h]); net->ct.expect_count++; setup_timer(&exp->timeout, nf_ct_expectation_timed_out, (unsigned long)exp); - if (master_help) { - p = &rcu_dereference_protected( - master_help->helper, - lockdep_is_held(&nf_conntrack_lock) - )->expect_policy[exp->class]; - exp->timeout.expires = jiffies + p->timeout * HZ; + helper = rcu_dereference_protected(master_help->helper, + lockdep_is_held(&nf_conntrack_lock)); + if (helper) { + exp->timeout.expires = jiffies + + helper->expect_policy[exp->class].timeout * HZ; } add_timer(&exp->timeout); NF_CT_STAT_INC(net, expect_create); + return 0; } /* Race with expectations being used means we could have none to find; OK. */ @@ -387,14 +384,13 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect) struct nf_conntrack_expect *i; struct nf_conn *master = expect->master; struct nf_conn_help *master_help = nfct_help(master); + struct nf_conntrack_helper *helper; struct net *net = nf_ct_exp_net(expect); struct hlist_node *n; unsigned int h; int ret = 1; - /* Don't allow expectations created from kernel-space with no helper */ - if (!(expect->flags & NF_CT_EXPECT_USERSPACE) && - (!master_help || (master_help && !master_help->helper))) { + if (!master_help) { ret = -ESHUTDOWN; goto out; } @@ -412,11 +408,10 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect) } } /* Will be over limit? */ - if (master_help) { - p = &rcu_dereference_protected( - master_help->helper, - lockdep_is_held(&nf_conntrack_lock) - )->expect_policy[expect->class]; + helper = rcu_dereference_protected(master_help->helper, + lockdep_is_held(&nf_conntrack_lock)); + if (helper) { + p = &helper->expect_policy[expect->class]; if (p->max_expected && master_help->expecting[expect->class] >= p->max_expected) { evict_oldest_expect(master, expect); @@ -448,8 +443,9 @@ int nf_ct_expect_related_report(struct nf_conntrack_expect *expect, if (ret <= 0) goto out; - ret = 0; - nf_ct_expect_insert(expect); + ret = nf_ct_expect_insert(expect); + if (ret < 0) + goto out; spin_unlock_bh(&nf_conntrack_lock); nf_ct_expect_event_report(IPEXP_NEW, expect, pid, report); return ret; @@ -459,22 +455,7 @@ out: } EXPORT_SYMBOL_GPL(nf_ct_expect_related_report); -void nf_ct_remove_userspace_expectations(void) -{ - struct nf_conntrack_expect *exp; - struct hlist_node *n, *next; - - hlist_for_each_entry_safe(exp, n, next, - &nf_ct_userspace_expect_list, lnode) { - if (del_timer(&exp->timeout)) { - nf_ct_unlink_expect(exp); - nf_ct_expect_put(exp); - } - } -} -EXPORT_SYMBOL_GPL(nf_ct_remove_userspace_expectations); - -#ifdef CONFIG_PROC_FS +#ifdef CONFIG_NF_CONNTRACK_PROCFS struct ct_expect_iter_state { struct seq_net_private p; unsigned int bucket; @@ -602,25 +583,25 @@ static const struct file_operations exp_file_ops = { .llseek = seq_lseek, .release = seq_release_net, }; -#endif /* CONFIG_PROC_FS */ +#endif /* CONFIG_NF_CONNTRACK_PROCFS */ static int exp_proc_init(struct net *net) { -#ifdef CONFIG_PROC_FS +#ifdef CONFIG_NF_CONNTRACK_PROCFS struct proc_dir_entry *proc; proc = proc_net_fops_create(net, "nf_conntrack_expect", 0440, &exp_file_ops); if (!proc) return -ENOMEM; -#endif /* CONFIG_PROC_FS */ +#endif /* CONFIG_NF_CONNTRACK_PROCFS */ return 0; } static void exp_proc_remove(struct net *net) { -#ifdef CONFIG_PROC_FS +#ifdef CONFIG_NF_CONNTRACK_PROCFS proc_net_remove(net, "nf_conntrack_expect"); -#endif /* CONFIG_PROC_FS */ +#endif /* CONFIG_NF_CONNTRACK_PROCFS */ } module_param_named(expect_hashsize, nf_ct_expect_hsize, uint, 0400); diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c index 05ecdc281a53..641ff5f96718 100644 --- a/net/netfilter/nf_conntrack_extend.c +++ b/net/netfilter/nf_conntrack_extend.c @@ -181,7 +181,7 @@ EXPORT_SYMBOL_GPL(nf_ct_extend_register); void nf_ct_extend_unregister(struct nf_ct_ext_type *type) { mutex_lock(&nf_ct_ext_type_mutex); - rcu_assign_pointer(nf_ct_ext_types[type->id], NULL); + RCU_INIT_POINTER(nf_ct_ext_types[type->id], NULL); update_alloc_size(type); mutex_unlock(&nf_ct_ext_type_mutex); rcu_barrier(); /* Wait for completion of call_rcu()'s */ diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c index 6f5801eac999..8c5c95c6d34f 100644 --- a/net/netfilter/nf_conntrack_ftp.c +++ b/net/netfilter/nf_conntrack_ftp.c @@ -42,7 +42,7 @@ static u_int16_t ports[MAX_PORTS]; static unsigned int ports_c; module_param_array(ports, ushort, &ports_c, 0400); -static int loose; +static bool loose; module_param(loose, bool, 0600); unsigned int (*nf_nat_ftp_hook)(struct sk_buff *skb, diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index f03c2d4539f6..722291f8af72 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -42,7 +42,7 @@ static int gkrouted_only __read_mostly = 1; module_param(gkrouted_only, int, 0600); MODULE_PARM_DESC(gkrouted_only, "only accept calls from gatekeeper"); -static int callforward_filter __read_mostly = 1; +static bool callforward_filter __read_mostly = true; module_param(callforward_filter, bool, 0600); MODULE_PARM_DESC(callforward_filter, "only create call forwarding expectations " "if both endpoints are on different sides " @@ -743,17 +743,16 @@ static int callforward_do_filter(const union nf_inet_addr *src, } break; } -#if defined(CONFIG_NF_CONNTRACK_IPV6) || \ - defined(CONFIG_NF_CONNTRACK_IPV6_MODULE) +#if IS_ENABLED(CONFIG_NF_CONNTRACK_IPV6) case AF_INET6: { struct flowi6 fl1, fl2; struct rt6_info *rt1, *rt2; memset(&fl1, 0, sizeof(fl1)); - ipv6_addr_copy(&fl1.daddr, &src->in6); + fl1.daddr = src->in6; memset(&fl2, 0, sizeof(fl2)); - ipv6_addr_copy(&fl2.daddr, &dst->in6); + fl2.daddr = dst->in6; if (!afinfo->route(&init_net, (struct dst_entry **)&rt1, flowi6_to_flowi(&fl1), false)) { if (!afinfo->route(&init_net, (struct dst_entry **)&rt2, diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 1bdfea357955..299fec91f741 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -121,6 +121,18 @@ int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl, int ret = 0; if (tmpl != NULL) { + /* we've got a userspace helper. */ + if (tmpl->status & IPS_USERSPACE_HELPER) { + help = nf_ct_helper_ext_add(ct, flags); + if (help == NULL) { + ret = -ENOMEM; + goto out; + } + rcu_assign_pointer(help->helper, NULL); + __set_bit(IPS_USERSPACE_HELPER_BIT, &ct->status); + ret = 0; + goto out; + } help = nfct_help(tmpl); if (help != NULL) helper = help->helper; @@ -131,7 +143,7 @@ int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl, helper = __nf_ct_helper_find(&ct->tuplehash[IP_CT_DIR_REPLY].tuple); if (helper == NULL) { if (help) - rcu_assign_pointer(help->helper, NULL); + RCU_INIT_POINTER(help->helper, NULL); goto out; } @@ -162,7 +174,7 @@ static inline int unhelp(struct nf_conntrack_tuple_hash *i, lockdep_is_held(&nf_conntrack_lock) ) == me) { nf_conntrack_event(IPCT_HELPER, ct); - rcu_assign_pointer(help->helper, NULL); + RCU_INIT_POINTER(help->helper, NULL); } return 0; } diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 7dec88a1755b..2a4834b83332 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -4,7 +4,7 @@ * (C) 2001 by Jay Schulist <jschlst@samba.org> * (C) 2002-2006 by Harald Welte <laforge@gnumonks.org> * (C) 2003 by Patrick Mchardy <kaber@trash.net> - * (C) 2005-2008 by Pablo Neira Ayuso <pablo@netfilter.org> + * (C) 2005-2011 by Pablo Neira Ayuso <pablo@netfilter.org> * * Initial connection tracking via netlink development funded and * generally made possible by Network Robots, Inc. (www.networkrobots.com) @@ -135,7 +135,7 @@ nla_put_failure: static inline int ctnetlink_dump_timeout(struct sk_buff *skb, const struct nf_conn *ct) { - long timeout = (ct->timeout.expires - jiffies) / HZ; + long timeout = ((long)ct->timeout.expires - (long)jiffies) / HZ; if (timeout < 0) timeout = 0; @@ -203,25 +203,18 @@ nla_put_failure: } static int -ctnetlink_dump_counters(struct sk_buff *skb, const struct nf_conn *ct, - enum ip_conntrack_dir dir) +dump_counters(struct sk_buff *skb, u64 pkts, u64 bytes, + enum ip_conntrack_dir dir) { enum ctattr_type type = dir ? CTA_COUNTERS_REPLY: CTA_COUNTERS_ORIG; struct nlattr *nest_count; - const struct nf_conn_counter *acct; - - acct = nf_conn_acct_find(ct); - if (!acct) - return 0; nest_count = nla_nest_start(skb, type | NLA_F_NESTED); if (!nest_count) goto nla_put_failure; - NLA_PUT_BE64(skb, CTA_COUNTERS_PACKETS, - cpu_to_be64(acct[dir].packets)); - NLA_PUT_BE64(skb, CTA_COUNTERS_BYTES, - cpu_to_be64(acct[dir].bytes)); + NLA_PUT_BE64(skb, CTA_COUNTERS_PACKETS, cpu_to_be64(pkts)); + NLA_PUT_BE64(skb, CTA_COUNTERS_BYTES, cpu_to_be64(bytes)); nla_nest_end(skb, nest_count); @@ -232,6 +225,27 @@ nla_put_failure: } static int +ctnetlink_dump_counters(struct sk_buff *skb, const struct nf_conn *ct, + enum ip_conntrack_dir dir, int type) +{ + struct nf_conn_counter *acct; + u64 pkts, bytes; + + acct = nf_conn_acct_find(ct); + if (!acct) + return 0; + + if (type == IPCTNL_MSG_CT_GET_CTRZERO) { + pkts = atomic64_xchg(&acct[dir].packets, 0); + bytes = atomic64_xchg(&acct[dir].bytes, 0); + } else { + pkts = atomic64_read(&acct[dir].packets); + bytes = atomic64_read(&acct[dir].bytes); + } + return dump_counters(skb, pkts, bytes, dir); +} + +static int ctnetlink_dump_timestamp(struct sk_buff *skb, const struct nf_conn *ct) { struct nlattr *nest_count; @@ -393,15 +407,15 @@ nla_put_failure: } static int -ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq, - int event, struct nf_conn *ct) +ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq, u32 type, + struct nf_conn *ct) { struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; struct nlattr *nest_parms; - unsigned int flags = pid ? NLM_F_MULTI : 0; + unsigned int flags = pid ? NLM_F_MULTI : 0, event; - event |= NFNL_SUBSYS_CTNETLINK << 8; + event = (NFNL_SUBSYS_CTNETLINK << 8 | IPCTNL_MSG_CT_NEW); nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags); if (nlh == NULL) goto nlmsg_failure; @@ -430,8 +444,8 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq, if (ctnetlink_dump_status(skb, ct) < 0 || ctnetlink_dump_timeout(skb, ct) < 0 || - ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 || - ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0 || + ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL, type) < 0 || + ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY, type) < 0 || ctnetlink_dump_timestamp(skb, ct) < 0 || ctnetlink_dump_protoinfo(skb, ct) < 0 || ctnetlink_dump_helpinfo(skb, ct) < 0 || @@ -612,8 +626,10 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) goto nla_put_failure; if (events & (1 << IPCT_DESTROY)) { - if (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 || - ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0 || + if (ctnetlink_dump_counters(skb, ct, + IP_CT_DIR_ORIGINAL, type) < 0 || + ctnetlink_dump_counters(skb, ct, + IP_CT_DIR_REPLY, type) < 0 || ctnetlink_dump_timestamp(skb, ct) < 0) goto nla_put_failure; } else { @@ -709,20 +725,13 @@ restart: } if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, - IPCTNL_MSG_CT_NEW, ct) < 0) { + NFNL_MSG_TYPE( + cb->nlh->nlmsg_type), + ct) < 0) { nf_conntrack_get(&ct->ct_general); cb->args[1] = (unsigned long)ct; goto out; } - - if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) == - IPCTNL_MSG_CT_GET_CTRZERO) { - struct nf_conn_counter *acct; - - acct = nf_conn_acct_find(ct); - if (acct) - memset(acct, 0, sizeof(struct nf_conn_counter[IP_CT_DIR_MAX])); - } } if (cb->args[1]) { cb->args[1] = 0; @@ -1001,7 +1010,7 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb, rcu_read_lock(); err = ctnetlink_fill_info(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq, - IPCTNL_MSG_CT_NEW, ct); + NFNL_MSG_TYPE(nlh->nlmsg_type), ct); rcu_read_unlock(); nf_ct_put(ct); if (err <= 0) @@ -1087,14 +1096,14 @@ ctnetlink_change_nat(struct nf_conn *ct, const struct nlattr * const cda[]) if (cda[CTA_NAT_DST]) { ret = ctnetlink_parse_nat_setup(ct, - IP_NAT_MANIP_DST, + NF_NAT_MANIP_DST, cda[CTA_NAT_DST]); if (ret < 0) return ret; } if (cda[CTA_NAT_SRC]) { ret = ctnetlink_parse_nat_setup(ct, - IP_NAT_MANIP_SRC, + NF_NAT_MANIP_SRC, cda[CTA_NAT_SRC]); if (ret < 0) return ret; @@ -1125,7 +1134,7 @@ ctnetlink_change_helper(struct nf_conn *ct, const struct nlattr * const cda[]) if (help && help->helper) { /* we had a helper before ... */ nf_ct_remove_expectations(ct); - rcu_assign_pointer(help->helper, NULL); + RCU_INIT_POINTER(help->helper, NULL); } return 0; @@ -1358,12 +1367,15 @@ ctnetlink_create_conntrack(struct net *net, u16 zone, nf_ct_protonum(ct)); if (helper == NULL) { rcu_read_unlock(); + spin_unlock_bh(&nf_conntrack_lock); #ifdef CONFIG_MODULES if (request_module("nfct-helper-%s", helpname) < 0) { + spin_lock_bh(&nf_conntrack_lock); err = -EOPNOTSUPP; goto err1; } + spin_lock_bh(&nf_conntrack_lock); rcu_read_lock(); helper = __nf_conntrack_helper_find(helpname, nf_ct_l3num(ct), @@ -1386,7 +1398,7 @@ ctnetlink_create_conntrack(struct net *net, u16 zone, } /* not in hash table yet so not strictly necessary */ - rcu_assign_pointer(help->helper, helper); + RCU_INIT_POINTER(help->helper, helper); } } else { /* try an implicit helper assignation */ @@ -1638,7 +1650,7 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb, const struct nf_conntrack_expect *exp) { struct nf_conn *master = exp->master; - long timeout = (exp->timeout.expires - jiffies) / HZ; + long timeout = ((long)exp->timeout.expires - (long)jiffies) / HZ; struct nf_conn_help *help; if (timeout < 0) @@ -1847,7 +1859,9 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, if (err < 0) return err; - if (cda[CTA_EXPECT_MASTER]) + if (cda[CTA_EXPECT_TUPLE]) + err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, u3); + else if (cda[CTA_EXPECT_MASTER]) err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_MASTER, u3); else return -EINVAL; @@ -1869,25 +1883,30 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, err = -ENOMEM; skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (skb2 == NULL) + if (skb2 == NULL) { + nf_ct_expect_put(exp); goto out; + } rcu_read_lock(); err = ctnetlink_exp_fill_info(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW, exp); rcu_read_unlock(); + nf_ct_expect_put(exp); if (err <= 0) goto free; - nf_ct_expect_put(exp); + err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT); + if (err < 0) + goto out; - return netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT); + return 0; free: kfree_skb(skb2); out: - nf_ct_expect_put(exp); - return err; + /* this avoids a loop in nfnetlink. */ + return err == -EAGAIN ? -ENOBUFS : err; } static int @@ -2023,6 +2042,10 @@ ctnetlink_create_expect(struct net *net, u16 zone, } help = nfct_help(ct); if (!help) { + err = -EOPNOTSUPP; + goto out; + } + if (test_bit(IPS_USERSPACE_HELPER_BIT, &ct->status)) { if (!cda[CTA_EXPECT_TIMEOUT]) { err = -EINVAL; goto out; @@ -2163,6 +2186,54 @@ MODULE_ALIAS("ip_conntrack_netlink"); MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_CTNETLINK); MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_CTNETLINK_EXP); +static int __net_init ctnetlink_net_init(struct net *net) +{ +#ifdef CONFIG_NF_CONNTRACK_EVENTS + int ret; + + ret = nf_conntrack_register_notifier(net, &ctnl_notifier); + if (ret < 0) { + pr_err("ctnetlink_init: cannot register notifier.\n"); + goto err_out; + } + + ret = nf_ct_expect_register_notifier(net, &ctnl_notifier_exp); + if (ret < 0) { + pr_err("ctnetlink_init: cannot expect register notifier.\n"); + goto err_unreg_notifier; + } +#endif + return 0; + +#ifdef CONFIG_NF_CONNTRACK_EVENTS +err_unreg_notifier: + nf_conntrack_unregister_notifier(net, &ctnl_notifier); +err_out: + return ret; +#endif +} + +static void ctnetlink_net_exit(struct net *net) +{ +#ifdef CONFIG_NF_CONNTRACK_EVENTS + nf_ct_expect_unregister_notifier(net, &ctnl_notifier_exp); + nf_conntrack_unregister_notifier(net, &ctnl_notifier); +#endif +} + +static void __net_exit ctnetlink_net_exit_batch(struct list_head *net_exit_list) +{ + struct net *net; + + list_for_each_entry(net, net_exit_list, exit_list) + ctnetlink_net_exit(net); +} + +static struct pernet_operations ctnetlink_net_ops = { + .init = ctnetlink_net_init, + .exit_batch = ctnetlink_net_exit_batch, +}; + static int __init ctnetlink_init(void) { int ret; @@ -2180,28 +2251,15 @@ static int __init ctnetlink_init(void) goto err_unreg_subsys; } -#ifdef CONFIG_NF_CONNTRACK_EVENTS - ret = nf_conntrack_register_notifier(&ctnl_notifier); - if (ret < 0) { - pr_err("ctnetlink_init: cannot register notifier.\n"); + if (register_pernet_subsys(&ctnetlink_net_ops)) { + pr_err("ctnetlink_init: cannot register pernet operations\n"); goto err_unreg_exp_subsys; } - ret = nf_ct_expect_register_notifier(&ctnl_notifier_exp); - if (ret < 0) { - pr_err("ctnetlink_init: cannot expect register notifier.\n"); - goto err_unreg_notifier; - } -#endif - return 0; -#ifdef CONFIG_NF_CONNTRACK_EVENTS -err_unreg_notifier: - nf_conntrack_unregister_notifier(&ctnl_notifier); err_unreg_exp_subsys: nfnetlink_subsys_unregister(&ctnl_exp_subsys); -#endif err_unreg_subsys: nfnetlink_subsys_unregister(&ctnl_subsys); err_out: @@ -2212,12 +2270,7 @@ static void __exit ctnetlink_exit(void) { pr_info("ctnetlink: unregistering from nfnetlink.\n"); - nf_ct_remove_userspace_expectations(); -#ifdef CONFIG_NF_CONNTRACK_EVENTS - nf_ct_expect_unregister_notifier(&ctnl_notifier_exp); - nf_conntrack_unregister_notifier(&ctnl_notifier); -#endif - + unregister_pernet_subsys(&ctnetlink_net_ops); nfnetlink_subsys_unregister(&ctnl_exp_subsys); nfnetlink_subsys_unregister(&ctnl_subsys); } diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index 2e664a69d7db..d6dde6dc09e6 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -629,7 +629,7 @@ static int dccp_print_conntrack(struct seq_file *s, struct nf_conn *ct) return seq_printf(s, "%s ", dccp_state_names[ct->proto.dccp.state]); } -#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) +#if IS_ENABLED(CONFIG_NF_CT_NETLINK) static int dccp_to_nlattr(struct sk_buff *skb, struct nlattr *nla, struct nf_conn *ct) { @@ -770,7 +770,7 @@ static struct nf_conntrack_l4proto dccp_proto4 __read_mostly = { .error = dccp_error, .print_tuple = dccp_print_tuple, .print_conntrack = dccp_print_conntrack, -#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) +#if IS_ENABLED(CONFIG_NF_CT_NETLINK) .to_nlattr = dccp_to_nlattr, .nlattr_size = dccp_nlattr_size, .from_nlattr = nlattr_to_dccp, @@ -792,7 +792,7 @@ static struct nf_conntrack_l4proto dccp_proto6 __read_mostly = { .error = dccp_error, .print_tuple = dccp_print_tuple, .print_conntrack = dccp_print_conntrack, -#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) +#if IS_ENABLED(CONFIG_NF_CT_NETLINK) .to_nlattr = dccp_to_nlattr, .nlattr_size = dccp_nlattr_size, .from_nlattr = nlattr_to_dccp, diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index d69facdd9a7a..f0338791b822 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -291,7 +291,7 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_gre4 __read_mostly = { .new = gre_new, .destroy = gre_destroy, .me = THIS_MODULE, -#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) +#if IS_ENABLED(CONFIG_NF_CT_NETLINK) .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size, .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index 6772b1154654..afa69136061a 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -461,7 +461,7 @@ static bool sctp_new(struct nf_conn *ct, const struct sk_buff *skb, return true; } -#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) +#if IS_ENABLED(CONFIG_NF_CT_NETLINK) #include <linux/netfilter/nfnetlink.h> #include <linux/netfilter/nfnetlink_conntrack.h> @@ -666,7 +666,7 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 __read_mostly = { .packet = sctp_packet, .new = sctp_new, .me = THIS_MODULE, -#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) +#if IS_ENABLED(CONFIG_NF_CT_NETLINK) .to_nlattr = sctp_to_nlattr, .nlattr_size = sctp_nlattr_size, .from_nlattr = nlattr_to_sctp, @@ -696,7 +696,7 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 __read_mostly = { .packet = sctp_packet, .new = sctp_new, .me = THIS_MODULE, -#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) +#if IS_ENABLED(CONFIG_NF_CT_NETLINK) .to_nlattr = sctp_to_nlattr, .nlattr_size = sctp_nlattr_size, .from_nlattr = nlattr_to_sctp, diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 8235b86b4e87..97b9f3ebf28c 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -1126,7 +1126,7 @@ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb, return true; } -#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) +#if IS_ENABLED(CONFIG_NF_CT_NETLINK) #include <linux/netfilter/nfnetlink.h> #include <linux/netfilter/nfnetlink_conntrack.h> @@ -1447,7 +1447,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly = .packet = tcp_packet, .new = tcp_new, .error = tcp_error, -#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) +#if IS_ENABLED(CONFIG_NF_CT_NETLINK) .to_nlattr = tcp_to_nlattr, .nlattr_size = tcp_nlattr_size, .from_nlattr = nlattr_to_tcp, @@ -1479,7 +1479,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 __read_mostly = .packet = tcp_packet, .new = tcp_new, .error = tcp_error, -#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) +#if IS_ENABLED(CONFIG_NF_CT_NETLINK) .to_nlattr = tcp_to_nlattr, .nlattr_size = tcp_nlattr_size, .from_nlattr = nlattr_to_tcp, diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index 8289088b8218..5f35757fbff0 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -188,7 +188,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 __read_mostly = .packet = udp_packet, .new = udp_new, .error = udp_error, -#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) +#if IS_ENABLED(CONFIG_NF_CT_NETLINK) .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size, @@ -216,7 +216,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6 __read_mostly = .packet = udp_packet, .new = udp_new, .error = udp_error, -#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) +#if IS_ENABLED(CONFIG_NF_CT_NETLINK) .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size, diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c index 263b5a72588d..f52ca1181013 100644 --- a/net/netfilter/nf_conntrack_proto_udplite.c +++ b/net/netfilter/nf_conntrack_proto_udplite.c @@ -174,7 +174,7 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 __read_mostly = .packet = udplite_packet, .new = udplite_new, .error = udplite_error, -#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) +#if IS_ENABLED(CONFIG_NF_CT_NETLINK) .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size, .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, @@ -198,7 +198,7 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 __read_mostly = .packet = udplite_packet, .new = udplite_new, .error = udplite_error, -#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) +#if IS_ENABLED(CONFIG_NF_CT_NETLINK) .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size, .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 05e9feb101c3..885f5ab9bc28 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -34,7 +34,7 @@ MODULE_LICENSE("GPL"); -#ifdef CONFIG_PROC_FS +#ifdef CONFIG_NF_CONNTRACK_PROCFS int print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple, const struct nf_conntrack_l3proto *l3proto, @@ -396,7 +396,7 @@ static int nf_conntrack_standalone_init_proc(struct net *net) static void nf_conntrack_standalone_fini_proc(struct net *net) { } -#endif /* CONFIG_PROC_FS */ +#endif /* CONFIG_NF_CONNTRACK_PROCFS */ /* Sysctl support */ diff --git a/net/netfilter/nf_conntrack_timestamp.c b/net/netfilter/nf_conntrack_timestamp.c index af7dd31af0a1..e8d27afbbdb9 100644 --- a/net/netfilter/nf_conntrack_timestamp.c +++ b/net/netfilter/nf_conntrack_timestamp.c @@ -15,7 +15,7 @@ #include <net/netfilter/nf_conntrack_extend.h> #include <net/netfilter/nf_conntrack_timestamp.h> -static int nf_ct_tstamp __read_mostly; +static bool nf_ct_tstamp __read_mostly; module_param_named(tstamp, nf_ct_tstamp, bool, 0644); MODULE_PARM_DESC(tstamp, "Enable connection tracking flow timestamping."); diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index 20714edf6cd2..957374a234d4 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -74,7 +74,7 @@ void nf_log_unregister(struct nf_logger *logger) c_logger = rcu_dereference_protected(nf_loggers[i], lockdep_is_held(&nf_log_mutex)); if (c_logger == logger) - rcu_assign_pointer(nf_loggers[i], NULL); + RCU_INIT_POINTER(nf_loggers[i], NULL); list_del(&logger->list[i]); } mutex_unlock(&nf_log_mutex); @@ -103,7 +103,7 @@ void nf_log_unbind_pf(u_int8_t pf) if (pf >= ARRAY_SIZE(nf_loggers)) return; mutex_lock(&nf_log_mutex); - rcu_assign_pointer(nf_loggers[pf], NULL); + RCU_INIT_POINTER(nf_loggers[pf], NULL); mutex_unlock(&nf_log_mutex); } EXPORT_SYMBOL(nf_log_unbind_pf); diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index 84d0fd47636a..b3a7db678b8d 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -65,7 +65,7 @@ int nf_unregister_queue_handler(u_int8_t pf, const struct nf_queue_handler *qh) return -EINVAL; } - rcu_assign_pointer(queue_handler[pf], NULL); + RCU_INIT_POINTER(queue_handler[pf], NULL); mutex_unlock(&queue_handler_mutex); synchronize_rcu(); @@ -84,7 +84,7 @@ void nf_unregister_queue_handlers(const struct nf_queue_handler *qh) queue_handler[pf], lockdep_is_held(&queue_handler_mutex) ) == qh) - rcu_assign_pointer(queue_handler[pf], NULL); + RCU_INIT_POINTER(queue_handler[pf], NULL); } mutex_unlock(&queue_handler_mutex); diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index e6c2b8f32180..4d70785b953d 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -219,7 +219,7 @@ static void __net_exit nfnetlink_net_exit_batch(struct list_head *net_exit_list) struct net *net; list_for_each_entry(net, net_exit_list, exit_list) - rcu_assign_pointer(net->nfnl, NULL); + RCU_INIT_POINTER(net->nfnl, NULL); synchronize_net(); list_for_each_entry(net, net_exit_list, exit_list) netlink_kernel_release(net->nfnl_stash); diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c new file mode 100644 index 000000000000..11ba013e47f6 --- /dev/null +++ b/net/netfilter/nfnetlink_acct.c @@ -0,0 +1,361 @@ +/* + * (C) 2011 Pablo Neira Ayuso <pablo@netfilter.org> + * (C) 2011 Intra2net AG <http://www.intra2net.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation (or any later at your option). + */ +#include <linux/init.h> +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/skbuff.h> +#include <linux/netlink.h> +#include <linux/rculist.h> +#include <linux/slab.h> +#include <linux/types.h> +#include <linux/errno.h> +#include <net/netlink.h> +#include <net/sock.h> +#include <asm/atomic.h> + +#include <linux/netfilter.h> +#include <linux/netfilter/nfnetlink.h> +#include <linux/netfilter/nfnetlink_acct.h> + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); +MODULE_DESCRIPTION("nfacct: Extended Netfilter accounting infrastructure"); + +static LIST_HEAD(nfnl_acct_list); + +struct nf_acct { + atomic64_t pkts; + atomic64_t bytes; + struct list_head head; + atomic_t refcnt; + char name[NFACCT_NAME_MAX]; + struct rcu_head rcu_head; +}; + +static int +nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, const struct nlattr * const tb[]) +{ + struct nf_acct *nfacct, *matching = NULL; + char *acct_name; + + if (!tb[NFACCT_NAME]) + return -EINVAL; + + acct_name = nla_data(tb[NFACCT_NAME]); + + list_for_each_entry(nfacct, &nfnl_acct_list, head) { + if (strncmp(nfacct->name, acct_name, NFACCT_NAME_MAX) != 0) + continue; + + if (nlh->nlmsg_flags & NLM_F_EXCL) + return -EEXIST; + + matching = nfacct; + break; + } + + if (matching) { + if (nlh->nlmsg_flags & NLM_F_REPLACE) { + /* reset counters if you request a replacement. */ + atomic64_set(&matching->pkts, 0); + atomic64_set(&matching->bytes, 0); + return 0; + } + return -EBUSY; + } + + nfacct = kzalloc(sizeof(struct nf_acct), GFP_KERNEL); + if (nfacct == NULL) + return -ENOMEM; + + strncpy(nfacct->name, nla_data(tb[NFACCT_NAME]), NFACCT_NAME_MAX); + + if (tb[NFACCT_BYTES]) { + atomic64_set(&nfacct->bytes, + be64_to_cpu(nla_get_u64(tb[NFACCT_BYTES]))); + } + if (tb[NFACCT_PKTS]) { + atomic64_set(&nfacct->pkts, + be64_to_cpu(nla_get_u64(tb[NFACCT_PKTS]))); + } + atomic_set(&nfacct->refcnt, 1); + list_add_tail_rcu(&nfacct->head, &nfnl_acct_list); + return 0; +} + +static int +nfnl_acct_fill_info(struct sk_buff *skb, u32 pid, u32 seq, u32 type, + int event, struct nf_acct *acct) +{ + struct nlmsghdr *nlh; + struct nfgenmsg *nfmsg; + unsigned int flags = pid ? NLM_F_MULTI : 0; + u64 pkts, bytes; + + event |= NFNL_SUBSYS_ACCT << 8; + nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags); + if (nlh == NULL) + goto nlmsg_failure; + + nfmsg = nlmsg_data(nlh); + nfmsg->nfgen_family = AF_UNSPEC; + nfmsg->version = NFNETLINK_V0; + nfmsg->res_id = 0; + + NLA_PUT_STRING(skb, NFACCT_NAME, acct->name); + + if (type == NFNL_MSG_ACCT_GET_CTRZERO) { + pkts = atomic64_xchg(&acct->pkts, 0); + bytes = atomic64_xchg(&acct->bytes, 0); + } else { + pkts = atomic64_read(&acct->pkts); + bytes = atomic64_read(&acct->bytes); + } + NLA_PUT_BE64(skb, NFACCT_PKTS, cpu_to_be64(pkts)); + NLA_PUT_BE64(skb, NFACCT_BYTES, cpu_to_be64(bytes)); + NLA_PUT_BE32(skb, NFACCT_USE, htonl(atomic_read(&acct->refcnt))); + + nlmsg_end(skb, nlh); + return skb->len; + +nlmsg_failure: +nla_put_failure: + nlmsg_cancel(skb, nlh); + return -1; +} + +static int +nfnl_acct_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct nf_acct *cur, *last; + + if (cb->args[2]) + return 0; + + last = (struct nf_acct *)cb->args[1]; + if (cb->args[1]) + cb->args[1] = 0; + + rcu_read_lock(); + list_for_each_entry_rcu(cur, &nfnl_acct_list, head) { + if (last && cur != last) + continue; + + if (nfnl_acct_fill_info(skb, NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, + NFNL_MSG_TYPE(cb->nlh->nlmsg_type), + NFNL_MSG_ACCT_NEW, cur) < 0) { + cb->args[1] = (unsigned long)cur; + break; + } + } + if (!cb->args[1]) + cb->args[2] = 1; + rcu_read_unlock(); + return skb->len; +} + +static int +nfnl_acct_get(struct sock *nfnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, const struct nlattr * const tb[]) +{ + int ret = -ENOENT; + struct nf_acct *cur; + char *acct_name; + + if (nlh->nlmsg_flags & NLM_F_DUMP) { + return netlink_dump_start(nfnl, skb, nlh, nfnl_acct_dump, + NULL, 0); + } + + if (!tb[NFACCT_NAME]) + return -EINVAL; + acct_name = nla_data(tb[NFACCT_NAME]); + + list_for_each_entry(cur, &nfnl_acct_list, head) { + struct sk_buff *skb2; + + if (strncmp(cur->name, acct_name, NFACCT_NAME_MAX)!= 0) + continue; + + skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (skb2 == NULL) { + ret = -ENOMEM; + break; + } + + ret = nfnl_acct_fill_info(skb2, NETLINK_CB(skb).pid, + nlh->nlmsg_seq, + NFNL_MSG_TYPE(nlh->nlmsg_type), + NFNL_MSG_ACCT_NEW, cur); + if (ret <= 0) { + kfree_skb(skb2); + break; + } + ret = netlink_unicast(nfnl, skb2, NETLINK_CB(skb).pid, + MSG_DONTWAIT); + if (ret > 0) + ret = 0; + + /* this avoids a loop in nfnetlink. */ + return ret == -EAGAIN ? -ENOBUFS : ret; + } + return ret; +} + +/* try to delete object, fail if it is still in use. */ +static int nfnl_acct_try_del(struct nf_acct *cur) +{ + int ret = 0; + + /* we want to avoid races with nfnl_acct_find_get. */ + if (atomic_dec_and_test(&cur->refcnt)) { + /* We are protected by nfnl mutex. */ + list_del_rcu(&cur->head); + kfree_rcu(cur, rcu_head); + } else { + /* still in use, restore reference counter. */ + atomic_inc(&cur->refcnt); + ret = -EBUSY; + } + return ret; +} + +static int +nfnl_acct_del(struct sock *nfnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, const struct nlattr * const tb[]) +{ + char *acct_name; + struct nf_acct *cur; + int ret = -ENOENT; + + if (!tb[NFACCT_NAME]) { + list_for_each_entry(cur, &nfnl_acct_list, head) + nfnl_acct_try_del(cur); + + return 0; + } + acct_name = nla_data(tb[NFACCT_NAME]); + + list_for_each_entry(cur, &nfnl_acct_list, head) { + if (strncmp(cur->name, acct_name, NFACCT_NAME_MAX) != 0) + continue; + + ret = nfnl_acct_try_del(cur); + if (ret < 0) + return ret; + + break; + } + return ret; +} + +static const struct nla_policy nfnl_acct_policy[NFACCT_MAX+1] = { + [NFACCT_NAME] = { .type = NLA_NUL_STRING, .len = NFACCT_NAME_MAX-1 }, + [NFACCT_BYTES] = { .type = NLA_U64 }, + [NFACCT_PKTS] = { .type = NLA_U64 }, +}; + +static const struct nfnl_callback nfnl_acct_cb[NFNL_MSG_ACCT_MAX] = { + [NFNL_MSG_ACCT_NEW] = { .call = nfnl_acct_new, + .attr_count = NFACCT_MAX, + .policy = nfnl_acct_policy }, + [NFNL_MSG_ACCT_GET] = { .call = nfnl_acct_get, + .attr_count = NFACCT_MAX, + .policy = nfnl_acct_policy }, + [NFNL_MSG_ACCT_GET_CTRZERO] = { .call = nfnl_acct_get, + .attr_count = NFACCT_MAX, + .policy = nfnl_acct_policy }, + [NFNL_MSG_ACCT_DEL] = { .call = nfnl_acct_del, + .attr_count = NFACCT_MAX, + .policy = nfnl_acct_policy }, +}; + +static const struct nfnetlink_subsystem nfnl_acct_subsys = { + .name = "acct", + .subsys_id = NFNL_SUBSYS_ACCT, + .cb_count = NFNL_MSG_ACCT_MAX, + .cb = nfnl_acct_cb, +}; + +MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_ACCT); + +struct nf_acct *nfnl_acct_find_get(const char *acct_name) +{ + struct nf_acct *cur, *acct = NULL; + + rcu_read_lock(); + list_for_each_entry_rcu(cur, &nfnl_acct_list, head) { + if (strncmp(cur->name, acct_name, NFACCT_NAME_MAX)!= 0) + continue; + + if (!try_module_get(THIS_MODULE)) + goto err; + + if (!atomic_inc_not_zero(&cur->refcnt)) { + module_put(THIS_MODULE); + goto err; + } + + acct = cur; + break; + } +err: + rcu_read_unlock(); + return acct; +} +EXPORT_SYMBOL_GPL(nfnl_acct_find_get); + +void nfnl_acct_put(struct nf_acct *acct) +{ + atomic_dec(&acct->refcnt); + module_put(THIS_MODULE); +} +EXPORT_SYMBOL_GPL(nfnl_acct_put); + +void nfnl_acct_update(const struct sk_buff *skb, struct nf_acct *nfacct) +{ + atomic64_inc(&nfacct->pkts); + atomic64_add(skb->len, &nfacct->bytes); +} +EXPORT_SYMBOL_GPL(nfnl_acct_update); + +static int __init nfnl_acct_init(void) +{ + int ret; + + pr_info("nfnl_acct: registering with nfnetlink.\n"); + ret = nfnetlink_subsys_register(&nfnl_acct_subsys); + if (ret < 0) { + pr_err("nfnl_acct_init: cannot register with nfnetlink.\n"); + goto err_out; + } + return 0; +err_out: + return ret; +} + +static void __exit nfnl_acct_exit(void) +{ + struct nf_acct *cur, *tmp; + + pr_info("nfnl_acct: unregistering from nfnetlink.\n"); + nfnetlink_subsys_unregister(&nfnl_acct_subsys); + + list_for_each_entry_safe(cur, tmp, &nfnl_acct_list, head) { + list_del_rcu(&cur->head); + /* We are sure that our objects have no clients at this point, + * it's safe to release them all without checking refcnt. */ + kfree_rcu(cur, rcu_head); + } +} + +module_init(nfnl_acct_init); +module_exit(nfnl_acct_exit); diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 2d8158acf6fa..66b2c54c544f 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -307,17 +307,14 @@ nfulnl_alloc_skb(unsigned int inst_size, unsigned int pkt_size) n = max(inst_size, pkt_size); skb = alloc_skb(n, GFP_ATOMIC); if (!skb) { - pr_notice("nfnetlink_log: can't alloc whole buffer (%u bytes)\n", - inst_size); - if (n > pkt_size) { /* try to allocate only as much as we need for current * packet */ skb = alloc_skb(pkt_size, GFP_ATOMIC); if (!skb) - pr_err("nfnetlink_log: can't even alloc %u " - "bytes\n", pkt_size); + pr_err("nfnetlink_log: can't even alloc %u bytes\n", + pkt_size); } } diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index b0869fe3633b..8d987c3573fd 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -14,6 +14,7 @@ */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/kernel.h> +#include <linux/module.h> #include <linux/socket.h> #include <linux/net.h> #include <linux/proc_fs.h> @@ -776,12 +777,11 @@ static int xt_jumpstack_alloc(struct xt_table_info *i) size = sizeof(void **) * nr_cpu_ids; if (size > PAGE_SIZE) - i->jumpstack = vmalloc(size); + i->jumpstack = vzalloc(size); else - i->jumpstack = kmalloc(size, GFP_KERNEL); + i->jumpstack = kzalloc(size, GFP_KERNEL); if (i->jumpstack == NULL) return -ENOMEM; - memset(i->jumpstack, 0, size); i->stacksize *= xt_jumpstack_multiplier; size = sizeof(void *) * i->stacksize; diff --git a/net/netfilter/xt_AUDIT.c b/net/netfilter/xt_AUDIT.c index 4bca15a0c385..ba92824086f3 100644 --- a/net/netfilter/xt_AUDIT.c +++ b/net/netfilter/xt_AUDIT.c @@ -98,6 +98,7 @@ static void audit_ip6(struct audit_buffer *ab, struct sk_buff *skb) struct ipv6hdr _ip6h; const struct ipv6hdr *ih; u8 nexthdr; + __be16 frag_off; int offset; ih = skb_header_pointer(skb, skb_network_offset(skb), sizeof(_ip6h), &_ip6h); @@ -108,7 +109,7 @@ static void audit_ip6(struct audit_buffer *ab, struct sk_buff *skb) nexthdr = ih->nexthdr; offset = ipv6_skip_exthdr(skb, skb_network_offset(skb) + sizeof(_ip6h), - &nexthdr); + &nexthdr, &frag_off); audit_log_format(ab, " saddr=%pI6c daddr=%pI6c proto=%hhu", &ih->saddr, &ih->daddr, nexthdr); diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c index 0221d10de75a..8e87123f1373 100644 --- a/net/netfilter/xt_CT.c +++ b/net/netfilter/xt_CT.c @@ -62,8 +62,8 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par) int ret = 0; u8 proto; - if (info->flags & ~XT_CT_NOTRACK) - return -EINVAL; + if (info->flags & ~(XT_CT_NOTRACK | XT_CT_USERSPACE_HELPER)) + return -EOPNOTSUPP; if (info->flags & XT_CT_NOTRACK) { ct = nf_ct_untracked_get(); @@ -92,7 +92,9 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par) GFP_KERNEL)) goto err3; - if (info->helper[0]) { + if (info->flags & XT_CT_USERSPACE_HELPER) { + __set_bit(IPS_USERSPACE_HELPER_BIT, &ct->status); + } else if (info->helper[0]) { ret = -ENOENT; proto = xt_ct_find_proto(par); if (!proto) { diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c index 3bdd443aaf15..f407ebc13481 100644 --- a/net/netfilter/xt_IDLETIMER.c +++ b/net/netfilter/xt_IDLETIMER.c @@ -122,14 +122,12 @@ static int idletimer_tg_create(struct idletimer_tg_info *info) info->timer = kmalloc(sizeof(*info->timer), GFP_KERNEL); if (!info->timer) { - pr_debug("couldn't alloc timer\n"); ret = -ENOMEM; goto out; } info->timer->attr.attr.name = kstrdup(info->label, GFP_KERNEL); if (!info->timer->attr.attr.name) { - pr_debug("couldn't alloc attribute name\n"); ret = -ENOMEM; goto out_free_timer; } diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c index d4f4b5d66b20..95237c89607a 100644 --- a/net/netfilter/xt_NFQUEUE.c +++ b/net/netfilter/xt_NFQUEUE.c @@ -49,7 +49,7 @@ static u32 hash_v4(const struct sk_buff *skb) return jhash_2words((__force u32)ipaddr, iph->protocol, jhash_initval); } -#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) static u32 hash_v6(const struct sk_buff *skb) { const struct ipv6hdr *ip6h = ipv6_hdr(skb); @@ -74,7 +74,7 @@ nfqueue_tg_v1(struct sk_buff *skb, const struct xt_action_param *par) if (par->family == NFPROTO_IPV4) queue = (((u64) hash_v4(skb) * info->queues_total) >> 32) + queue; -#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) else if (par->family == NFPROTO_IPV6) queue = (((u64) hash_v6(skb) * info->queues_total) >> 32) + queue; diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c index 9e63b43faeed..190ad37c5cf8 100644 --- a/net/netfilter/xt_TCPMSS.c +++ b/net/netfilter/xt_TCPMSS.c @@ -161,7 +161,7 @@ static u_int32_t tcpmss_reverse_mtu(const struct sk_buff *skb, struct flowi6 *fl6 = &fl.u.ip6; memset(fl6, 0, sizeof(*fl6)); - ipv6_addr_copy(&fl6->daddr, &ipv6_hdr(skb)->saddr); + fl6->daddr = ipv6_hdr(skb)->saddr; } rcu_read_lock(); ai = nf_get_afinfo(family); @@ -198,17 +198,18 @@ tcpmss_tg4(struct sk_buff *skb, const struct xt_action_param *par) return XT_CONTINUE; } -#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) static unsigned int tcpmss_tg6(struct sk_buff *skb, const struct xt_action_param *par) { struct ipv6hdr *ipv6h = ipv6_hdr(skb); u8 nexthdr; + __be16 frag_off; int tcphoff; int ret; nexthdr = ipv6h->nexthdr; - tcphoff = ipv6_skip_exthdr(skb, sizeof(*ipv6h), &nexthdr); + tcphoff = ipv6_skip_exthdr(skb, sizeof(*ipv6h), &nexthdr, &frag_off); if (tcphoff < 0) return NF_DROP; ret = tcpmss_mangle_packet(skb, par->targinfo, @@ -259,7 +260,7 @@ static int tcpmss_tg4_check(const struct xt_tgchk_param *par) return -EINVAL; } -#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) static int tcpmss_tg6_check(const struct xt_tgchk_param *par) { const struct xt_tcpmss_info *info = par->targinfo; @@ -292,7 +293,7 @@ static struct xt_target tcpmss_tg_reg[] __read_mostly = { .proto = IPPROTO_TCP, .me = THIS_MODULE, }, -#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) { .family = NFPROTO_IPV6, .name = "TCPMSS", diff --git a/net/netfilter/xt_TCPOPTSTRIP.c b/net/netfilter/xt_TCPOPTSTRIP.c index 9dc9ecfdd546..25fd1c4e1eec 100644 --- a/net/netfilter/xt_TCPOPTSTRIP.c +++ b/net/netfilter/xt_TCPOPTSTRIP.c @@ -80,16 +80,17 @@ tcpoptstrip_tg4(struct sk_buff *skb, const struct xt_action_param *par) sizeof(struct iphdr) + sizeof(struct tcphdr)); } -#if defined(CONFIG_IP6_NF_MANGLE) || defined(CONFIG_IP6_NF_MANGLE_MODULE) +#if IS_ENABLED(CONFIG_IP6_NF_MANGLE) static unsigned int tcpoptstrip_tg6(struct sk_buff *skb, const struct xt_action_param *par) { struct ipv6hdr *ipv6h = ipv6_hdr(skb); int tcphoff; u_int8_t nexthdr; + __be16 frag_off; nexthdr = ipv6h->nexthdr; - tcphoff = ipv6_skip_exthdr(skb, sizeof(*ipv6h), &nexthdr); + tcphoff = ipv6_skip_exthdr(skb, sizeof(*ipv6h), &nexthdr, &frag_off); if (tcphoff < 0) return NF_DROP; @@ -108,7 +109,7 @@ static struct xt_target tcpoptstrip_tg_reg[] __read_mostly = { .targetsize = sizeof(struct xt_tcpoptstrip_target_info), .me = THIS_MODULE, }, -#if defined(CONFIG_IP6_NF_MANGLE) || defined(CONFIG_IP6_NF_MANGLE_MODULE) +#if IS_ENABLED(CONFIG_IP6_NF_MANGLE) { .name = "TCPOPTSTRIP", .family = NFPROTO_IPV6, diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c index 5f054a0dbbb1..3aae66facf9f 100644 --- a/net/netfilter/xt_TEE.c +++ b/net/netfilter/xt_TEE.c @@ -25,13 +25,10 @@ #include <linux/netfilter/x_tables.h> #include <linux/netfilter/xt_TEE.h> -#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) +#if IS_ENABLED(CONFIG_NF_CONNTRACK) # define WITH_CONNTRACK 1 # include <net/netfilter/nf_conntrack.h> #endif -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) -# define WITH_IPV6 1 -#endif struct xt_tee_priv { struct notifier_block notifier; @@ -136,7 +133,7 @@ tee_tg4(struct sk_buff *skb, const struct xt_action_param *par) return XT_CONTINUE; } -#ifdef WITH_IPV6 +#if IS_ENABLED(CONFIG_IPV6) static bool tee_tg_route6(struct sk_buff *skb, const struct xt_tee_tginfo *info) { @@ -196,7 +193,7 @@ tee_tg6(struct sk_buff *skb, const struct xt_action_param *par) } return XT_CONTINUE; } -#endif /* WITH_IPV6 */ +#endif static int tee_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) @@ -276,7 +273,7 @@ static struct xt_target tee_tg_reg[] __read_mostly = { .destroy = tee_tg_destroy, .me = THIS_MODULE, }, -#ifdef WITH_IPV6 +#if IS_ENABLED(CONFIG_IPV6) { .name = "TEE", .revision = 1, diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c index dcfd57eb9d02..35a959a096e0 100644 --- a/net/netfilter/xt_TPROXY.c +++ b/net/netfilter/xt_TPROXY.c @@ -22,7 +22,7 @@ #include <net/netfilter/ipv4/nf_defrag_ipv4.h> -#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) #define XT_TPROXY_HAVE_IPV6 1 #include <net/if_inet6.h> #include <net/addrconf.h> diff --git a/net/netfilter/xt_addrtype.c b/net/netfilter/xt_addrtype.c index b77d383cec78..49c5ff7f6dd6 100644 --- a/net/netfilter/xt_addrtype.c +++ b/net/netfilter/xt_addrtype.c @@ -16,7 +16,7 @@ #include <linux/ip.h> #include <net/route.h> -#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) #include <net/ipv6.h> #include <net/ip6_route.h> #include <net/ip6_fib.h> @@ -31,7 +31,7 @@ MODULE_DESCRIPTION("Xtables: address type match"); MODULE_ALIAS("ipt_addrtype"); MODULE_ALIAS("ip6t_addrtype"); -#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) static u32 match_lookup_rt6(struct net *net, const struct net_device *dev, const struct in6_addr *addr) { @@ -42,7 +42,7 @@ static u32 match_lookup_rt6(struct net *net, const struct net_device *dev, int route_err; memset(&flow, 0, sizeof(flow)); - ipv6_addr_copy(&flow.daddr, addr); + flow.daddr = *addr; if (dev) flow.flowi6_oif = dev->ifindex; @@ -149,7 +149,7 @@ addrtype_mt_v1(const struct sk_buff *skb, struct xt_action_param *par) else if (info->flags & XT_ADDRTYPE_LIMIT_IFACE_OUT) dev = par->out; -#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) if (par->family == NFPROTO_IPV6) return addrtype_mt6(net, dev, skb, info); #endif @@ -190,7 +190,7 @@ static int addrtype_mt_checkentry_v1(const struct xt_mtchk_param *par) return -EINVAL; } -#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) if (par->family == NFPROTO_IPV6) { if ((info->source | info->dest) & XT_ADDRTYPE_BLACKHOLE) { pr_err("ipv6 BLACKHOLE matching not supported\n"); diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c index 5b138506690e..e595e07a759b 100644 --- a/net/netfilter/xt_connbytes.c +++ b/net/netfilter/xt_connbytes.c @@ -40,46 +40,46 @@ connbytes_mt(const struct sk_buff *skb, struct xt_action_param *par) case XT_CONNBYTES_PKTS: switch (sinfo->direction) { case XT_CONNBYTES_DIR_ORIGINAL: - what = counters[IP_CT_DIR_ORIGINAL].packets; + what = atomic64_read(&counters[IP_CT_DIR_ORIGINAL].packets); break; case XT_CONNBYTES_DIR_REPLY: - what = counters[IP_CT_DIR_REPLY].packets; + what = atomic64_read(&counters[IP_CT_DIR_REPLY].packets); break; case XT_CONNBYTES_DIR_BOTH: - what = counters[IP_CT_DIR_ORIGINAL].packets; - what += counters[IP_CT_DIR_REPLY].packets; + what = atomic64_read(&counters[IP_CT_DIR_ORIGINAL].packets); + what += atomic64_read(&counters[IP_CT_DIR_REPLY].packets); break; } break; case XT_CONNBYTES_BYTES: switch (sinfo->direction) { case XT_CONNBYTES_DIR_ORIGINAL: - what = counters[IP_CT_DIR_ORIGINAL].bytes; + what = atomic64_read(&counters[IP_CT_DIR_ORIGINAL].bytes); break; case XT_CONNBYTES_DIR_REPLY: - what = counters[IP_CT_DIR_REPLY].bytes; + what = atomic64_read(&counters[IP_CT_DIR_REPLY].bytes); break; case XT_CONNBYTES_DIR_BOTH: - what = counters[IP_CT_DIR_ORIGINAL].bytes; - what += counters[IP_CT_DIR_REPLY].bytes; + what = atomic64_read(&counters[IP_CT_DIR_ORIGINAL].bytes); + what += atomic64_read(&counters[IP_CT_DIR_REPLY].bytes); break; } break; case XT_CONNBYTES_AVGPKT: switch (sinfo->direction) { case XT_CONNBYTES_DIR_ORIGINAL: - bytes = counters[IP_CT_DIR_ORIGINAL].bytes; - pkts = counters[IP_CT_DIR_ORIGINAL].packets; + bytes = atomic64_read(&counters[IP_CT_DIR_ORIGINAL].bytes); + pkts = atomic64_read(&counters[IP_CT_DIR_ORIGINAL].packets); break; case XT_CONNBYTES_DIR_REPLY: - bytes = counters[IP_CT_DIR_REPLY].bytes; - pkts = counters[IP_CT_DIR_REPLY].packets; + bytes = atomic64_read(&counters[IP_CT_DIR_REPLY].bytes); + pkts = atomic64_read(&counters[IP_CT_DIR_REPLY].packets); break; case XT_CONNBYTES_DIR_BOTH: - bytes = counters[IP_CT_DIR_ORIGINAL].bytes + - counters[IP_CT_DIR_REPLY].bytes; - pkts = counters[IP_CT_DIR_ORIGINAL].packets + - counters[IP_CT_DIR_REPLY].packets; + bytes = atomic64_read(&counters[IP_CT_DIR_ORIGINAL].bytes) + + atomic64_read(&counters[IP_CT_DIR_REPLY].bytes); + pkts = atomic64_read(&counters[IP_CT_DIR_ORIGINAL].packets) + + atomic64_read(&counters[IP_CT_DIR_REPLY].packets); break; } if (pkts != 0) @@ -87,10 +87,10 @@ connbytes_mt(const struct sk_buff *skb, struct xt_action_param *par) break; } - if (sinfo->count.to) + if (sinfo->count.to >= sinfo->count.from) return what <= sinfo->count.to && what >= sinfo->count.from; - else - return what >= sinfo->count.from; + else /* inverted */ + return what < sinfo->count.to || what > sinfo->count.from; } static int connbytes_mt_check(const struct xt_mtchk_param *par) diff --git a/net/netfilter/xt_ecn.c b/net/netfilter/xt_ecn.c new file mode 100644 index 000000000000..3c831a8efebc --- /dev/null +++ b/net/netfilter/xt_ecn.c @@ -0,0 +1,179 @@ +/* + * Xtables module for matching the value of the IPv4/IPv6 and TCP ECN bits + * + * (C) 2002 by Harald Welte <laforge@gnumonks.org> + * (C) 2011 Patrick McHardy <kaber@trash.net> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <linux/in.h> +#include <linux/ip.h> +#include <net/ip.h> +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/tcp.h> + +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter/xt_ecn.h> +#include <linux/netfilter_ipv4/ip_tables.h> +#include <linux/netfilter_ipv6/ip6_tables.h> + +MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); +MODULE_DESCRIPTION("Xtables: Explicit Congestion Notification (ECN) flag match"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("ipt_ecn"); +MODULE_ALIAS("ip6t_ecn"); + +static bool match_tcp(const struct sk_buff *skb, struct xt_action_param *par) +{ + const struct xt_ecn_info *einfo = par->matchinfo; + struct tcphdr _tcph; + const struct tcphdr *th; + + /* In practice, TCP match does this, so can't fail. But let's + * be good citizens. + */ + th = skb_header_pointer(skb, par->thoff, sizeof(_tcph), &_tcph); + if (th == NULL) + return false; + + if (einfo->operation & XT_ECN_OP_MATCH_ECE) { + if (einfo->invert & XT_ECN_OP_MATCH_ECE) { + if (th->ece == 1) + return false; + } else { + if (th->ece == 0) + return false; + } + } + + if (einfo->operation & XT_ECN_OP_MATCH_CWR) { + if (einfo->invert & XT_ECN_OP_MATCH_CWR) { + if (th->cwr == 1) + return false; + } else { + if (th->cwr == 0) + return false; + } + } + + return true; +} + +static inline bool match_ip(const struct sk_buff *skb, + const struct xt_ecn_info *einfo) +{ + return ((ip_hdr(skb)->tos & XT_ECN_IP_MASK) == einfo->ip_ect) ^ + !!(einfo->invert & XT_ECN_OP_MATCH_IP); +} + +static bool ecn_mt4(const struct sk_buff *skb, struct xt_action_param *par) +{ + const struct xt_ecn_info *info = par->matchinfo; + + if (info->operation & XT_ECN_OP_MATCH_IP && !match_ip(skb, info)) + return false; + + if (info->operation & (XT_ECN_OP_MATCH_ECE | XT_ECN_OP_MATCH_CWR) && + !match_tcp(skb, par)) + return false; + + return true; +} + +static int ecn_mt_check4(const struct xt_mtchk_param *par) +{ + const struct xt_ecn_info *info = par->matchinfo; + const struct ipt_ip *ip = par->entryinfo; + + if (info->operation & XT_ECN_OP_MATCH_MASK) + return -EINVAL; + + if (info->invert & XT_ECN_OP_MATCH_MASK) + return -EINVAL; + + if (info->operation & (XT_ECN_OP_MATCH_ECE | XT_ECN_OP_MATCH_CWR) && + (ip->proto != IPPROTO_TCP || ip->invflags & IPT_INV_PROTO)) { + pr_info("cannot match TCP bits in rule for non-tcp packets\n"); + return -EINVAL; + } + + return 0; +} + +static inline bool match_ipv6(const struct sk_buff *skb, + const struct xt_ecn_info *einfo) +{ + return (((ipv6_hdr(skb)->flow_lbl[0] >> 4) & XT_ECN_IP_MASK) == + einfo->ip_ect) ^ + !!(einfo->invert & XT_ECN_OP_MATCH_IP); +} + +static bool ecn_mt6(const struct sk_buff *skb, struct xt_action_param *par) +{ + const struct xt_ecn_info *info = par->matchinfo; + + if (info->operation & XT_ECN_OP_MATCH_IP && !match_ipv6(skb, info)) + return false; + + if (info->operation & (XT_ECN_OP_MATCH_ECE | XT_ECN_OP_MATCH_CWR) && + !match_tcp(skb, par)) + return false; + + return true; +} + +static int ecn_mt_check6(const struct xt_mtchk_param *par) +{ + const struct xt_ecn_info *info = par->matchinfo; + const struct ip6t_ip6 *ip = par->entryinfo; + + if (info->operation & XT_ECN_OP_MATCH_MASK) + return -EINVAL; + + if (info->invert & XT_ECN_OP_MATCH_MASK) + return -EINVAL; + + if (info->operation & (XT_ECN_OP_MATCH_ECE | XT_ECN_OP_MATCH_CWR) && + (ip->proto != IPPROTO_TCP || ip->invflags & IP6T_INV_PROTO)) { + pr_info("cannot match TCP bits in rule for non-tcp packets\n"); + return -EINVAL; + } + + return 0; +} + +static struct xt_match ecn_mt_reg[] __read_mostly = { + { + .name = "ecn", + .family = NFPROTO_IPV4, + .match = ecn_mt4, + .matchsize = sizeof(struct xt_ecn_info), + .checkentry = ecn_mt_check4, + .me = THIS_MODULE, + }, + { + .name = "ecn", + .family = NFPROTO_IPV6, + .match = ecn_mt6, + .matchsize = sizeof(struct xt_ecn_info), + .checkentry = ecn_mt_check6, + .me = THIS_MODULE, + }, +}; + +static int __init ecn_mt_init(void) +{ + return xt_register_matches(ecn_mt_reg, ARRAY_SIZE(ecn_mt_reg)); +} + +static void __exit ecn_mt_exit(void) +{ + xt_unregister_matches(ecn_mt_reg, ARRAY_SIZE(ecn_mt_reg)); +} + +module_init(ecn_mt_init); +module_exit(ecn_mt_exit); diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index 9228ee0dc11a..8e4992101875 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -21,7 +21,7 @@ #include <linux/mm.h> #include <linux/in.h> #include <linux/ip.h> -#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) #include <linux/ipv6.h> #include <net/ipv6.h> #endif @@ -64,7 +64,7 @@ struct dsthash_dst { __be32 src; __be32 dst; } ip; -#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) struct { __be32 src[4]; __be32 dst[4]; @@ -176,10 +176,7 @@ dsthash_alloc_init(struct xt_hashlimit_htable *ht, ent = NULL; } else ent = kmem_cache_alloc(hashlimit_cachep, GFP_ATOMIC); - if (!ent) { - if (net_ratelimit()) - pr_err("cannot allocate dsthash_ent\n"); - } else { + if (ent) { memcpy(&ent->dst, dst, sizeof(ent->dst)); spin_lock_init(&ent->lock); @@ -416,7 +413,7 @@ static inline __be32 maskl(__be32 a, unsigned int l) return l ? htonl(ntohl(a) & ~0 << (32 - l)) : 0; } -#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) static void hashlimit_ipv6_mask(__be32 *i, unsigned int p) { switch (p) { @@ -448,6 +445,7 @@ hashlimit_init_dst(const struct xt_hashlimit_htable *hinfo, { __be16 _ports[2], *ports; u8 nexthdr; + __be16 frag_off; int poff; memset(dst, 0, sizeof(*dst)); @@ -466,7 +464,7 @@ hashlimit_init_dst(const struct xt_hashlimit_htable *hinfo, return 0; nexthdr = ip_hdr(skb)->protocol; break; -#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) case NFPROTO_IPV6: if (hinfo->cfg.mode & XT_HASHLIMIT_HASH_DIP) { memcpy(&dst->ip6.dst, &ipv6_hdr(skb)->daddr, @@ -483,7 +481,7 @@ hashlimit_init_dst(const struct xt_hashlimit_htable *hinfo, (XT_HASHLIMIT_HASH_DPT | XT_HASHLIMIT_HASH_SPT))) return 0; nexthdr = ipv6_hdr(skb)->nexthdr; - protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr); + protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr, &frag_off); if ((int)protoff < 0) return -1; break; @@ -618,7 +616,7 @@ static struct xt_match hashlimit_mt_reg[] __read_mostly = { .destroy = hashlimit_mt_destroy, .me = THIS_MODULE, }, -#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) { .name = "hashlimit", .revision = 1, @@ -695,7 +693,7 @@ static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family, ent->rateinfo.credit, ent->rateinfo.credit_cap, ent->rateinfo.cost); break; -#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) case NFPROTO_IPV6: res = seq_printf(s, "%ld %pI6:%u->%pI6:%u %u %u %u\n", (long)(ent->expires - jiffies)/HZ, @@ -763,7 +761,7 @@ static int __net_init hashlimit_proc_net_init(struct net *net) hashlimit_net->ipt_hashlimit = proc_mkdir("ipt_hashlimit", net->proc_net); if (!hashlimit_net->ipt_hashlimit) return -ENOMEM; -#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) hashlimit_net->ip6t_hashlimit = proc_mkdir("ip6t_hashlimit", net->proc_net); if (!hashlimit_net->ip6t_hashlimit) { proc_net_remove(net, "ipt_hashlimit"); @@ -776,7 +774,7 @@ static int __net_init hashlimit_proc_net_init(struct net *net) static void __net_exit hashlimit_proc_net_exit(struct net *net) { proc_net_remove(net, "ipt_hashlimit"); -#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) proc_net_remove(net, "ip6t_hashlimit"); #endif } diff --git a/net/netfilter/xt_nfacct.c b/net/netfilter/xt_nfacct.c new file mode 100644 index 000000000000..b3be0ef21f19 --- /dev/null +++ b/net/netfilter/xt_nfacct.c @@ -0,0 +1,76 @@ +/* + * (C) 2011 Pablo Neira Ayuso <pablo@netfilter.org> + * (C) 2011 Intra2net AG <http://www.intra2net.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 (or any + * later at your option) as published by the Free Software Foundation. + */ +#include <linux/module.h> +#include <linux/skbuff.h> + +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter/nfnetlink_acct.h> +#include <linux/netfilter/xt_nfacct.h> + +MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); +MODULE_DESCRIPTION("Xtables: match for the extended accounting infrastructure"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("ipt_nfacct"); +MODULE_ALIAS("ip6t_nfacct"); + +static bool nfacct_mt(const struct sk_buff *skb, struct xt_action_param *par) +{ + const struct xt_nfacct_match_info *info = par->targinfo; + + nfnl_acct_update(skb, info->nfacct); + + return true; +} + +static int +nfacct_mt_checkentry(const struct xt_mtchk_param *par) +{ + struct xt_nfacct_match_info *info = par->matchinfo; + struct nf_acct *nfacct; + + nfacct = nfnl_acct_find_get(info->name); + if (nfacct == NULL) { + pr_info("xt_nfacct: accounting object with name `%s' " + "does not exists\n", info->name); + return -ENOENT; + } + info->nfacct = nfacct; + return 0; +} + +static void +nfacct_mt_destroy(const struct xt_mtdtor_param *par) +{ + const struct xt_nfacct_match_info *info = par->matchinfo; + + nfnl_acct_put(info->nfacct); +} + +static struct xt_match nfacct_mt_reg __read_mostly = { + .name = "nfacct", + .family = NFPROTO_UNSPEC, + .checkentry = nfacct_mt_checkentry, + .match = nfacct_mt, + .destroy = nfacct_mt_destroy, + .matchsize = sizeof(struct xt_nfacct_match_info), + .me = THIS_MODULE, +}; + +static int __init nfacct_mt_init(void) +{ + return xt_register_match(&nfacct_mt_reg); +} + +static void __exit nfacct_mt_exit(void) +{ + xt_unregister_match(&nfacct_mt_reg); +} + +module_init(nfacct_mt_init); +module_exit(nfacct_mt_exit); diff --git a/net/netfilter/xt_quota.c b/net/netfilter/xt_quota.c index 70eb2b4984dd..44c8eb4c9d66 100644 --- a/net/netfilter/xt_quota.c +++ b/net/netfilter/xt_quota.c @@ -9,6 +9,7 @@ #include <linux/netfilter/x_tables.h> #include <linux/netfilter/xt_quota.h> +#include <linux/module.h> struct xt_quota_priv { spinlock_t lock; diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index fe39f7e913df..72bb07f57f97 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -22,7 +22,7 @@ #include <net/netfilter/nf_tproxy_core.h> #include <net/netfilter/ipv4/nf_defrag_ipv4.h> -#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) #define XT_SOCKET_HAVE_IPV6 1 #include <linux/netfilter_ipv6/ip6_tables.h> #include <net/netfilter/ipv6/nf_defrag_ipv6.h> @@ -30,7 +30,7 @@ #include <linux/netfilter/xt_socket.h> -#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) +#if IS_ENABLED(CONFIG_NF_CONNTRACK) #define XT_SOCKET_HAVE_CONNTRACK 1 #include <net/netfilter/nf_conntrack.h> #endif @@ -214,6 +214,7 @@ extract_icmp6_fields(const struct sk_buff *skb, struct icmp6hdr *icmph, _icmph; __be16 *ports, _ports[2]; u8 inside_nexthdr; + __be16 inside_fragoff; int inside_hdrlen; icmph = skb_header_pointer(skb, outside_hdrlen, @@ -229,7 +230,8 @@ extract_icmp6_fields(const struct sk_buff *skb, return 1; inside_nexthdr = inside_iph->nexthdr; - inside_hdrlen = ipv6_skip_exthdr(skb, outside_hdrlen + sizeof(_icmph) + sizeof(_inside_iph), &inside_nexthdr); + inside_hdrlen = ipv6_skip_exthdr(skb, outside_hdrlen + sizeof(_icmph) + sizeof(_inside_iph), + &inside_nexthdr, &inside_fragoff); if (inside_hdrlen < 0) return 1; /* hjm: Packet has no/incomplete transport layer headers. */ diff --git a/net/netfilter/xt_statistic.c b/net/netfilter/xt_statistic.c index 42ecb71d445f..4fe4fb4276d0 100644 --- a/net/netfilter/xt_statistic.c +++ b/net/netfilter/xt_statistic.c @@ -16,6 +16,7 @@ #include <linux/netfilter/xt_statistic.h> #include <linux/netfilter/x_tables.h> +#include <linux/module.h> struct xt_statistic_priv { atomic_t count; diff --git a/net/netlabel/netlabel_addrlist.c b/net/netlabel/netlabel_addrlist.c index 96b749dacc34..6f1701322fb6 100644 --- a/net/netlabel/netlabel_addrlist.c +++ b/net/netlabel/netlabel_addrlist.c @@ -96,7 +96,7 @@ struct netlbl_af4list *netlbl_af4list_search_exact(__be32 addr, } -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) /** * netlbl_af6list_search - Search for a matching IPv6 address entry * @addr: IPv6 address @@ -185,7 +185,7 @@ int netlbl_af4list_add(struct netlbl_af4list *entry, struct list_head *head) return 0; } -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) /** * netlbl_af6list_add - Add a new IPv6 address entry to a list * @entry: address entry @@ -263,7 +263,7 @@ struct netlbl_af4list *netlbl_af4list_remove(__be32 addr, __be32 mask, return entry; } -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) /** * netlbl_af6list_remove_entry - Remove an IPv6 address entry * @entry: address entry @@ -342,7 +342,7 @@ void netlbl_af4list_audit_addr(struct audit_buffer *audit_buf, } } -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) /** * netlbl_af6list_audit_addr - Audit an IPv6 address * @audit_buf: audit buffer diff --git a/net/netlabel/netlabel_addrlist.h b/net/netlabel/netlabel_addrlist.h index fdbc1d2c7352..a1287ce18130 100644 --- a/net/netlabel/netlabel_addrlist.h +++ b/net/netlabel/netlabel_addrlist.h @@ -133,7 +133,7 @@ static inline void netlbl_af4list_audit_addr(struct audit_buffer *audit_buf, } #endif -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) #define __af6list_entry(ptr) container_of(ptr, struct netlbl_af6list, list) diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c index 7d8083cde34f..d8d424337550 100644 --- a/net/netlabel/netlabel_domainhash.c +++ b/net/netlabel/netlabel_domainhash.c @@ -78,7 +78,7 @@ static void netlbl_domhsh_free_entry(struct rcu_head *entry) struct netlbl_dom_map *ptr; struct netlbl_af4list *iter4; struct netlbl_af4list *tmp4; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) struct netlbl_af6list *iter6; struct netlbl_af6list *tmp6; #endif /* IPv6 */ @@ -90,7 +90,7 @@ static void netlbl_domhsh_free_entry(struct rcu_head *entry) netlbl_af4list_remove_entry(iter4); kfree(netlbl_domhsh_addr4_entry(iter4)); } -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) netlbl_af6list_foreach_safe(iter6, tmp6, &ptr->type_def.addrsel->list6) { netlbl_af6list_remove_entry(iter6); @@ -217,7 +217,7 @@ static void netlbl_domhsh_audit_add(struct netlbl_dom_map *entry, cipsov4 = map4->type_def.cipsov4; netlbl_af4list_audit_addr(audit_buf, 0, NULL, addr4->addr, addr4->mask); -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) } else if (addr6 != NULL) { struct netlbl_domaddr6_map *map6; map6 = netlbl_domhsh_addr6_entry(addr6); @@ -306,7 +306,7 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry, struct netlbl_dom_map *entry_old; struct netlbl_af4list *iter4; struct netlbl_af4list *tmp4; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) struct netlbl_af6list *iter6; struct netlbl_af6list *tmp6; #endif /* IPv6 */ @@ -338,7 +338,7 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry, &entry->type_def.addrsel->list4) netlbl_domhsh_audit_add(entry, iter4, NULL, ret_val, audit_info); -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) netlbl_af6list_foreach_rcu(iter6, &entry->type_def.addrsel->list6) netlbl_domhsh_audit_add(entry, NULL, iter6, @@ -365,7 +365,7 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry, ret_val = -EEXIST; goto add_return; } -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) netlbl_af6list_foreach_rcu(iter6, &entry->type_def.addrsel->list6) if (netlbl_af6list_search_exact(&iter6->addr, @@ -386,7 +386,7 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry, if (ret_val != 0) goto add_return; } -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) netlbl_af6list_foreach_safe(iter6, tmp6, &entry->type_def.addrsel->list6) { netlbl_af6list_remove_entry(iter6); @@ -451,7 +451,7 @@ int netlbl_domhsh_remove_entry(struct netlbl_dom_map *entry, if (entry != rcu_dereference(netlbl_domhsh_def)) list_del_rcu(&entry->list); else - rcu_assign_pointer(netlbl_domhsh_def, NULL); + RCU_INIT_POINTER(netlbl_domhsh_def, NULL); } else ret_val = -ENOENT; spin_unlock(&netlbl_domhsh_lock); @@ -510,7 +510,7 @@ int netlbl_domhsh_remove_af4(const char *domain, struct netlbl_dom_map *entry_map; struct netlbl_af4list *entry_addr; struct netlbl_af4list *iter4; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) struct netlbl_af6list *iter6; #endif /* IPv6 */ struct netlbl_domaddr4_map *entry; @@ -533,7 +533,7 @@ int netlbl_domhsh_remove_af4(const char *domain, goto remove_af4_failure; netlbl_af4list_foreach_rcu(iter4, &entry_map->type_def.addrsel->list4) goto remove_af4_single_addr; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) netlbl_af6list_foreach_rcu(iter6, &entry_map->type_def.addrsel->list6) goto remove_af4_single_addr; #endif /* IPv6 */ @@ -644,7 +644,7 @@ struct netlbl_domaddr4_map *netlbl_domhsh_getentry_af4(const char *domain, return netlbl_domhsh_addr4_entry(addr_iter); } -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) /** * netlbl_domhsh_getentry_af6 - Get an entry from the domain hash table * @domain: the domain name to search for diff --git a/net/netlabel/netlabel_domainhash.h b/net/netlabel/netlabel_domainhash.h index bfcc0f7024c5..90872c4ca30f 100644 --- a/net/netlabel/netlabel_domainhash.h +++ b/net/netlabel/netlabel_domainhash.h @@ -104,7 +104,7 @@ int netlbl_domhsh_walk(u32 *skip_bkt, int (*callback) (struct netlbl_dom_map *entry, void *arg), void *cb_arg); -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) struct netlbl_domaddr6_map *netlbl_domhsh_getentry_af6(const char *domain, const struct in6_addr *addr); #endif /* IPv6 */ diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c index 9c24de10a657..2560e7b441c6 100644 --- a/net/netlabel/netlabel_kapi.c +++ b/net/netlabel/netlabel_kapi.c @@ -111,8 +111,6 @@ int netlbl_cfg_unlbl_map_add(const char *domain, struct netlbl_domaddr_map *addrmap = NULL; struct netlbl_domaddr4_map *map4 = NULL; struct netlbl_domaddr6_map *map6 = NULL; - const struct in_addr *addr4, *mask4; - const struct in6_addr *addr6, *mask6; entry = kzalloc(sizeof(*entry), GFP_ATOMIC); if (entry == NULL) @@ -133,9 +131,9 @@ int netlbl_cfg_unlbl_map_add(const char *domain, INIT_LIST_HEAD(&addrmap->list6); switch (family) { - case AF_INET: - addr4 = addr; - mask4 = mask; + case AF_INET: { + const struct in_addr *addr4 = addr; + const struct in_addr *mask4 = mask; map4 = kzalloc(sizeof(*map4), GFP_ATOMIC); if (map4 == NULL) goto cfg_unlbl_map_add_failure; @@ -148,25 +146,29 @@ int netlbl_cfg_unlbl_map_add(const char *domain, if (ret_val != 0) goto cfg_unlbl_map_add_failure; break; - case AF_INET6: - addr6 = addr; - mask6 = mask; + } +#if IS_ENABLED(CONFIG_IPV6) + case AF_INET6: { + const struct in6_addr *addr6 = addr; + const struct in6_addr *mask6 = mask; map6 = kzalloc(sizeof(*map6), GFP_ATOMIC); if (map6 == NULL) goto cfg_unlbl_map_add_failure; map6->type = NETLBL_NLTYPE_UNLABELED; - ipv6_addr_copy(&map6->list.addr, addr6); + map6->list.addr = *addr6; map6->list.addr.s6_addr32[0] &= mask6->s6_addr32[0]; map6->list.addr.s6_addr32[1] &= mask6->s6_addr32[1]; map6->list.addr.s6_addr32[2] &= mask6->s6_addr32[2]; map6->list.addr.s6_addr32[3] &= mask6->s6_addr32[3]; - ipv6_addr_copy(&map6->list.mask, mask6); + map6->list.mask = *mask6; map6->list.valid = 1; - ret_val = netlbl_af4list_add(&map4->list, - &addrmap->list4); + ret_val = netlbl_af6list_add(&map6->list, + &addrmap->list6); if (ret_val != 0) goto cfg_unlbl_map_add_failure; break; + } +#endif /* IPv6 */ default: goto cfg_unlbl_map_add_failure; break; @@ -225,9 +227,11 @@ int netlbl_cfg_unlbl_static_add(struct net *net, case AF_INET: addr_len = sizeof(struct in_addr); break; +#if IS_ENABLED(CONFIG_IPV6) case AF_INET6: addr_len = sizeof(struct in6_addr); break; +#endif /* IPv6 */ default: return -EPFNOSUPPORT; } @@ -266,9 +270,11 @@ int netlbl_cfg_unlbl_static_del(struct net *net, case AF_INET: addr_len = sizeof(struct in_addr); break; +#if IS_ENABLED(CONFIG_IPV6) case AF_INET6: addr_len = sizeof(struct in6_addr); break; +#endif /* IPv6 */ default: return -EPFNOSUPPORT; } @@ -667,7 +673,7 @@ int netlbl_sock_setattr(struct sock *sk, ret_val = -ENOENT; } break; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) case AF_INET6: /* since we don't support any IPv6 labeling protocols right * now we can optimize everything away until we do */ @@ -718,7 +724,7 @@ int netlbl_sock_getattr(struct sock *sk, case AF_INET: ret_val = cipso_v4_sock_getattr(sk, secattr); break; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) case AF_INET6: ret_val = -ENOMSG; break; @@ -776,7 +782,7 @@ int netlbl_conn_setattr(struct sock *sk, ret_val = -ENOENT; } break; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) case AF_INET6: /* since we don't support any IPv6 labeling protocols right * now we can optimize everything away until we do */ @@ -847,7 +853,7 @@ int netlbl_req_setattr(struct request_sock *req, ret_val = -ENOENT; } break; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) case AF_INET6: /* since we don't support any IPv6 labeling protocols right * now we can optimize everything away until we do */ @@ -920,7 +926,7 @@ int netlbl_skbuff_setattr(struct sk_buff *skb, ret_val = -ENOENT; } break; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) case AF_INET6: /* since we don't support any IPv6 labeling protocols right * now we can optimize everything away until we do */ @@ -959,7 +965,7 @@ int netlbl_skbuff_getattr(const struct sk_buff *skb, cipso_v4_skbuff_getattr(skb, secattr) == 0) return 0; break; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) case AF_INET6: break; #endif /* IPv6 */ diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c index bfa555869775..4809e2e48b02 100644 --- a/net/netlabel/netlabel_mgmt.c +++ b/net/netlabel/netlabel_mgmt.c @@ -184,7 +184,7 @@ static int netlbl_mgmt_add_common(struct genl_info *info, entry->type = NETLBL_NLTYPE_ADDRSELECT; entry->type_def.addrsel = addrmap; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) } else if (info->attrs[NLBL_MGMT_A_IPV6ADDR]) { struct in6_addr *addr; struct in6_addr *mask; @@ -216,12 +216,12 @@ static int netlbl_mgmt_add_common(struct genl_info *info, ret_val = -ENOMEM; goto add_failure; } - ipv6_addr_copy(&map->list.addr, addr); + map->list.addr = *addr; map->list.addr.s6_addr32[0] &= mask->s6_addr32[0]; map->list.addr.s6_addr32[1] &= mask->s6_addr32[1]; map->list.addr.s6_addr32[2] &= mask->s6_addr32[2]; map->list.addr.s6_addr32[3] &= mask->s6_addr32[3]; - ipv6_addr_copy(&map->list.mask, mask); + map->list.mask = *mask; map->list.valid = 1; map->type = entry->type; @@ -270,7 +270,7 @@ static int netlbl_mgmt_listentry(struct sk_buff *skb, struct nlattr *nla_a; struct nlattr *nla_b; struct netlbl_af4list *iter4; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) struct netlbl_af6list *iter6; #endif @@ -324,7 +324,7 @@ static int netlbl_mgmt_listentry(struct sk_buff *skb, nla_nest_end(skb, nla_b); } -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) netlbl_af6list_foreach_rcu(iter6, &entry->type_def.addrsel->list6) { struct netlbl_domaddr6_map *map6; diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index e6e823656f9d..e7ff694f1049 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -170,7 +170,7 @@ static void netlbl_unlhsh_free_iface(struct rcu_head *entry) struct netlbl_unlhsh_iface *iface; struct netlbl_af4list *iter4; struct netlbl_af4list *tmp4; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) struct netlbl_af6list *iter6; struct netlbl_af6list *tmp6; #endif /* IPv6 */ @@ -184,7 +184,7 @@ static void netlbl_unlhsh_free_iface(struct rcu_head *entry) netlbl_af4list_remove_entry(iter4); kfree(netlbl_unlhsh_addr4_entry(iter4)); } -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) netlbl_af6list_foreach_safe(iter6, tmp6, &iface->addr6_list) { netlbl_af6list_remove_entry(iter6); kfree(netlbl_unlhsh_addr6_entry(iter6)); @@ -274,7 +274,7 @@ static int netlbl_unlhsh_add_addr4(struct netlbl_unlhsh_iface *iface, return ret_val; } -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) /** * netlbl_unlhsh_add_addr6 - Add a new IPv6 address entry to the hash table * @iface: the associated interface entry @@ -300,12 +300,12 @@ static int netlbl_unlhsh_add_addr6(struct netlbl_unlhsh_iface *iface, if (entry == NULL) return -ENOMEM; - ipv6_addr_copy(&entry->list.addr, addr); + entry->list.addr = *addr; entry->list.addr.s6_addr32[0] &= mask->s6_addr32[0]; entry->list.addr.s6_addr32[1] &= mask->s6_addr32[1]; entry->list.addr.s6_addr32[2] &= mask->s6_addr32[2]; entry->list.addr.s6_addr32[3] &= mask->s6_addr32[3]; - ipv6_addr_copy(&entry->list.mask, mask); + entry->list.mask = *mask; entry->list.valid = 1; entry->secid = secid; @@ -436,7 +436,7 @@ int netlbl_unlhsh_add(struct net *net, mask4->s_addr); break; } -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) case sizeof(struct in6_addr): { const struct in6_addr *addr6 = addr; const struct in6_addr *mask6 = mask; @@ -531,7 +531,7 @@ static int netlbl_unlhsh_remove_addr4(struct net *net, return 0; } -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) /** * netlbl_unlhsh_remove_addr6 - Remove an IPv6 address entry * @net: network namespace @@ -606,14 +606,14 @@ static int netlbl_unlhsh_remove_addr6(struct net *net, static void netlbl_unlhsh_condremove_iface(struct netlbl_unlhsh_iface *iface) { struct netlbl_af4list *iter4; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) struct netlbl_af6list *iter6; #endif /* IPv6 */ spin_lock(&netlbl_unlhsh_lock); netlbl_af4list_foreach_rcu(iter4, &iface->addr4_list) goto unlhsh_condremove_failure; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) netlbl_af6list_foreach_rcu(iter6, &iface->addr6_list) goto unlhsh_condremove_failure; #endif /* IPv6 */ @@ -621,7 +621,7 @@ static void netlbl_unlhsh_condremove_iface(struct netlbl_unlhsh_iface *iface) if (iface->ifindex > 0) list_del_rcu(&iface->list); else - rcu_assign_pointer(netlbl_unlhsh_def, NULL); + RCU_INIT_POINTER(netlbl_unlhsh_def, NULL); spin_unlock(&netlbl_unlhsh_lock); call_rcu(&iface->rcu, netlbl_unlhsh_free_iface); @@ -680,7 +680,7 @@ int netlbl_unlhsh_remove(struct net *net, iface, addr, mask, audit_info); break; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) case sizeof(struct in6_addr): ret_val = netlbl_unlhsh_remove_addr6(net, iface, addr, mask, @@ -1196,7 +1196,7 @@ static int netlbl_unlabel_staticlist(struct sk_buff *skb, struct netlbl_unlhsh_iface *iface; struct list_head *iter_list; struct netlbl_af4list *addr4; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) struct netlbl_af6list *addr6; #endif @@ -1228,7 +1228,7 @@ static int netlbl_unlabel_staticlist(struct sk_buff *skb, goto unlabel_staticlist_return; } } -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) netlbl_af6list_foreach_rcu(addr6, &iface->addr6_list) { if (iter_addr6++ < skip_addr6) @@ -1277,7 +1277,7 @@ static int netlbl_unlabel_staticlistdef(struct sk_buff *skb, u32 skip_addr6 = cb->args[1]; u32 iter_addr4 = 0; struct netlbl_af4list *addr4; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) u32 iter_addr6 = 0; struct netlbl_af6list *addr6; #endif @@ -1303,7 +1303,7 @@ static int netlbl_unlabel_staticlistdef(struct sk_buff *skb, goto unlabel_staticlistdef_return; } } -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) netlbl_af6list_foreach_rcu(addr6, &iface->addr6_list) { if (iter_addr6++ < skip_addr6) continue; @@ -1447,11 +1447,9 @@ int __init netlbl_unlabel_init(u32 size) for (iter = 0; iter < hsh_tbl->size; iter++) INIT_LIST_HEAD(&hsh_tbl->tbl[iter]); - rcu_read_lock(); spin_lock(&netlbl_unlhsh_lock); rcu_assign_pointer(netlbl_unlhsh, hsh_tbl); spin_unlock(&netlbl_unlhsh_lock); - rcu_read_unlock(); register_netdevice_notifier(&netlbl_unlhsh_netdev_notifier); @@ -1494,7 +1492,7 @@ int netlbl_unlabel_getattr(const struct sk_buff *skb, secattr->attr.secid = netlbl_unlhsh_addr4_entry(addr4)->secid; break; } -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) case PF_INET6: { struct ipv6hdr *hdr6; struct netlbl_af6list *addr6; diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 0a4db0211da0..629b06182f3f 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -139,12 +139,12 @@ static atomic_t nl_table_users = ATOMIC_INIT(0); static ATOMIC_NOTIFIER_HEAD(netlink_chain); -static u32 netlink_group_mask(u32 group) +static inline u32 netlink_group_mask(u32 group) { return group ? 1 << (group - 1) : 0; } -static struct hlist_head *nl_pid_hashfn(struct nl_pid_hash *hash, u32 pid) +static inline struct hlist_head *nl_pid_hashfn(struct nl_pid_hash *hash, u32 pid) { return &hash->table[jhash_1word(pid, hash->rnd) & hash->mask]; } @@ -226,8 +226,7 @@ netlink_unlock_table(void) wake_up(&nl_table_wait); } -static inline struct sock *netlink_lookup(struct net *net, int protocol, - u32 pid) +static struct sock *netlink_lookup(struct net *net, int protocol, u32 pid) { struct nl_pid_hash *hash = &nl_table[protocol].hash; struct hlist_head *head; @@ -248,7 +247,7 @@ found: return sk; } -static inline struct hlist_head *nl_pid_hash_zalloc(size_t size) +static struct hlist_head *nl_pid_hash_zalloc(size_t size) { if (size <= PAGE_SIZE) return kzalloc(size, GFP_ATOMIC); @@ -258,7 +257,7 @@ static inline struct hlist_head *nl_pid_hash_zalloc(size_t size) get_order(size)); } -static inline void nl_pid_hash_free(struct hlist_head *table, size_t size) +static void nl_pid_hash_free(struct hlist_head *table, size_t size) { if (size <= PAGE_SIZE) kfree(table); @@ -578,7 +577,7 @@ retry: return err; } -static inline int netlink_capable(struct socket *sock, unsigned int flag) +static inline int netlink_capable(const struct socket *sock, unsigned int flag) { return (nl_table[sock->sk->sk_protocol].nl_nonroot & flag) || capable(CAP_NET_ADMIN); @@ -846,8 +845,7 @@ void netlink_detachskb(struct sock *sk, struct sk_buff *skb) sock_put(sk); } -static inline struct sk_buff *netlink_trim(struct sk_buff *skb, - gfp_t allocation) +static struct sk_buff *netlink_trim(struct sk_buff *skb, gfp_t allocation) { int delta; @@ -871,7 +869,7 @@ static inline struct sk_buff *netlink_trim(struct sk_buff *skb, return skb; } -static inline void netlink_rcv_wake(struct sock *sk) +static void netlink_rcv_wake(struct sock *sk) { struct netlink_sock *nlk = nlk_sk(sk); @@ -881,7 +879,7 @@ static inline void netlink_rcv_wake(struct sock *sk) wake_up_interruptible(&nlk->wait); } -static inline int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb) +static int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb) { int ret; struct netlink_sock *nlk = nlk_sk(sk); @@ -952,8 +950,7 @@ int netlink_has_listeners(struct sock *sk, unsigned int group) } EXPORT_SYMBOL_GPL(netlink_has_listeners); -static inline int netlink_broadcast_deliver(struct sock *sk, - struct sk_buff *skb) +static int netlink_broadcast_deliver(struct sock *sk, struct sk_buff *skb) { struct netlink_sock *nlk = nlk_sk(sk); @@ -962,7 +959,7 @@ static inline int netlink_broadcast_deliver(struct sock *sk, skb_set_owner_r(skb, sk); skb_queue_tail(&sk->sk_receive_queue, skb); sk->sk_data_ready(sk, skb->len); - return atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf; + return atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1); } return -1; } @@ -982,7 +979,7 @@ struct netlink_broadcast_data { void *tx_data; }; -static inline int do_one_broadcast(struct sock *sk, +static int do_one_broadcast(struct sock *sk, struct netlink_broadcast_data *p) { struct netlink_sock *nlk = nlk_sk(sk); @@ -1110,8 +1107,7 @@ struct netlink_set_err_data { int code; }; -static inline int do_one_set_err(struct sock *sk, - struct netlink_set_err_data *p) +static int do_one_set_err(struct sock *sk, struct netlink_set_err_data *p) { struct netlink_sock *nlk = nlk_sk(sk); int ret = 0; @@ -1324,10 +1320,9 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, if (msg->msg_flags&MSG_OOB) return -EOPNOTSUPP; - if (NULL == siocb->scm) { + if (NULL == siocb->scm) siocb->scm = &scm; - memset(&scm, 0, sizeof(scm)); - } + err = scm_send(sock, msg, siocb->scm); if (err < 0) return err; @@ -1578,7 +1573,7 @@ int __netlink_change_ngroups(struct sock *sk, unsigned int groups) new = kzalloc(sizeof(*new) + NLGRPSZ(groups), GFP_ATOMIC); if (!new) return -ENOMEM; - old = rcu_dereference_raw(tbl->listeners); + old = rcu_dereference_protected(tbl->listeners, 1); memcpy(new->masks, old->masks, NLGRPSZ(tbl->groups)); rcu_assign_pointer(tbl->listeners, new); diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 05fedbf489a5..c29d2568c9e0 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -33,6 +33,14 @@ void genl_unlock(void) } EXPORT_SYMBOL(genl_unlock); +#ifdef CONFIG_PROVE_LOCKING +int lockdep_genl_is_held(void) +{ + return lockdep_is_held(&genl_mutex); +} +EXPORT_SYMBOL(lockdep_genl_is_held); +#endif + #define GENL_FAM_TAB_SIZE 16 #define GENL_FAM_TAB_MASK (GENL_FAM_TAB_SIZE - 1) @@ -98,7 +106,7 @@ static struct genl_ops *genl_get_cmd(u8 cmd, struct genl_family *family) /* Of course we are going to have problems once we hit * 2^16 alive types, but that can only happen by year 2K */ -static inline u16 genl_generate_id(void) +static u16 genl_generate_id(void) { static u16 id_gen_idx = GENL_MIN_ID; int i; @@ -784,6 +792,15 @@ static int ctrl_getfamily(struct sk_buff *skb, struct genl_info *info) name = nla_data(info->attrs[CTRL_ATTR_FAMILY_NAME]); res = genl_family_find_byname(name); +#ifdef CONFIG_MODULES + if (res == NULL) { + genl_unlock(); + request_module("net-pf-%d-proto-%d-type-%s", + PF_NETLINK, NETLINK_GENERIC, name); + genl_lock(); + res = genl_family_find_byname(name); + } +#endif err = -ENOENT; } @@ -946,3 +963,16 @@ int genlmsg_multicast_allns(struct sk_buff *skb, u32 pid, unsigned int group, return genlmsg_mcast(skb, pid, group, flags); } EXPORT_SYMBOL(genlmsg_multicast_allns); + +void genl_notify(struct sk_buff *skb, struct net *net, u32 pid, u32 group, + struct nlmsghdr *nlh, gfp_t flags) +{ + struct sock *sk = net->genl_sock; + int report = 0; + + if (nlh) + report = nlmsg_report(nlh); + + nlmsg_notify(sk, skb, pid, group, report, flags); +} +EXPORT_SYMBOL(genl_notify); diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 732152f718e0..7dab229bfbcc 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -306,26 +306,26 @@ static int nr_setsockopt(struct socket *sock, int level, int optname, { struct sock *sk = sock->sk; struct nr_sock *nr = nr_sk(sk); - int opt; + unsigned long opt; if (level != SOL_NETROM) return -ENOPROTOOPT; - if (optlen < sizeof(int)) + if (optlen < sizeof(unsigned int)) return -EINVAL; - if (get_user(opt, (int __user *)optval)) + if (get_user(opt, (unsigned int __user *)optval)) return -EFAULT; switch (optname) { case NETROM_T1: - if (opt < 1) + if (opt < 1 || opt > ULONG_MAX / HZ) return -EINVAL; nr->t1 = opt * HZ; return 0; case NETROM_T2: - if (opt < 1) + if (opt < 1 || opt > ULONG_MAX / HZ) return -EINVAL; nr->t2 = opt * HZ; return 0; @@ -337,13 +337,13 @@ static int nr_setsockopt(struct socket *sock, int level, int optname, return 0; case NETROM_T4: - if (opt < 1) + if (opt < 1 || opt > ULONG_MAX / HZ) return -EINVAL; nr->t4 = opt * HZ; return 0; case NETROM_IDLE: - if (opt < 0) + if (opt > ULONG_MAX / (60 * HZ)) return -EINVAL; nr->idle = opt * 60 * HZ; return 0; @@ -1244,7 +1244,8 @@ static int nr_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) case SIOCADDRT: case SIOCDELRT: case SIOCNRDECOBS: - if (!capable(CAP_NET_ADMIN)) return -EPERM; + if (!capable(CAP_NET_ADMIN)) + return -EPERM; return nr_rt_ioctl(cmd, argp); default: diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c index cd5ddb2ebc43..2cf330162d7e 100644 --- a/net/netrom/nr_route.c +++ b/net/netrom/nr_route.c @@ -37,6 +37,7 @@ #include <linux/spinlock.h> #include <net/netrom.h> #include <linux/seq_file.h> +#include <linux/export.h> static unsigned int nr_neigh_no = 1; @@ -669,14 +670,17 @@ int nr_rt_ioctl(unsigned int cmd, void __user *arg) case SIOCADDRT: if (copy_from_user(&nr_route, arg, sizeof(struct nr_route_struct))) return -EFAULT; - if ((dev = nr_ax25_dev_get(nr_route.device)) == NULL) + if (nr_route.ndigis > AX25_MAX_DIGIS) return -EINVAL; - if (nr_route.ndigis < 0 || nr_route.ndigis > AX25_MAX_DIGIS) { - dev_put(dev); + if ((dev = nr_ax25_dev_get(nr_route.device)) == NULL) return -EINVAL; - } switch (nr_route.type) { case NETROM_NODE: + if (strnlen(nr_route.mnemonic, 7) == 7) { + ret = -EINVAL; + break; + } + ret = nr_add_node(&nr_route.callsign, nr_route.mnemonic, &nr_route.neighbour, diff --git a/net/nfc/Kconfig b/net/nfc/Kconfig index 33e095b124b3..44c865b86d6f 100644 --- a/net/nfc/Kconfig +++ b/net/nfc/Kconfig @@ -13,4 +13,7 @@ menuconfig NFC To compile this support as a module, choose M here: the module will be called nfc. +source "net/nfc/nci/Kconfig" +source "net/nfc/llcp/Kconfig" + source "drivers/nfc/Kconfig" diff --git a/net/nfc/Makefile b/net/nfc/Makefile index 16250c353851..7b4a6dcfa566 100644 --- a/net/nfc/Makefile +++ b/net/nfc/Makefile @@ -3,5 +3,7 @@ # obj-$(CONFIG_NFC) += nfc.o +obj-$(CONFIG_NFC_NCI) += nci/ nfc-objs := core.o netlink.o af_nfc.o rawsock.o +nfc-$(CONFIG_NFC_LLCP) += llcp/llcp.o llcp/commands.o llcp/sock.o diff --git a/net/nfc/af_nfc.c b/net/nfc/af_nfc.c index e982cef8f49d..da67756425ce 100644 --- a/net/nfc/af_nfc.c +++ b/net/nfc/af_nfc.c @@ -22,6 +22,7 @@ */ #include <linux/nfc.h> +#include <linux/module.h> #include "nfc.h" diff --git a/net/nfc/core.c b/net/nfc/core.c index b6fd4e1f2057..3ddf6e698df0 100644 --- a/net/nfc/core.c +++ b/net/nfc/core.c @@ -21,10 +21,13 @@ * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": %s: " fmt, __func__ + #include <linux/init.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/slab.h> +#include <linux/nfc.h> #include "nfc.h" @@ -33,24 +36,79 @@ int nfc_devlist_generation; DEFINE_MUTEX(nfc_devlist_mutex); -int nfc_printk(const char *level, const char *format, ...) +/** + * nfc_dev_up - turn on the NFC device + * + * @dev: The nfc device to be turned on + * + * The device remains up until the nfc_dev_down function is called. + */ +int nfc_dev_up(struct nfc_dev *dev) { - struct va_format vaf; - va_list args; - int r; + int rc = 0; - va_start(args, format); + pr_debug("dev_name=%s\n", dev_name(&dev->dev)); - vaf.fmt = format; - vaf.va = &args; + device_lock(&dev->dev); - r = printk("%sNFC: %pV\n", level, &vaf); + if (!device_is_registered(&dev->dev)) { + rc = -ENODEV; + goto error; + } + + if (dev->dev_up) { + rc = -EALREADY; + goto error; + } + + if (dev->ops->dev_up) + rc = dev->ops->dev_up(dev); - va_end(args); + if (!rc) + dev->dev_up = true; - return r; +error: + device_unlock(&dev->dev); + return rc; +} + +/** + * nfc_dev_down - turn off the NFC device + * + * @dev: The nfc device to be turned off + */ +int nfc_dev_down(struct nfc_dev *dev) +{ + int rc = 0; + + pr_debug("dev_name=%s\n", dev_name(&dev->dev)); + + device_lock(&dev->dev); + + if (!device_is_registered(&dev->dev)) { + rc = -ENODEV; + goto error; + } + + if (!dev->dev_up) { + rc = -EALREADY; + goto error; + } + + if (dev->polling || dev->remote_activated) { + rc = -EBUSY; + goto error; + } + + if (dev->ops->dev_down) + dev->ops->dev_down(dev); + + dev->dev_up = false; + +error: + device_unlock(&dev->dev); + return rc; } -EXPORT_SYMBOL(nfc_printk); /** * nfc_start_poll - start polling for nfc targets @@ -65,7 +123,8 @@ int nfc_start_poll(struct nfc_dev *dev, u32 protocols) { int rc; - nfc_dbg("dev_name=%s protocols=0x%x", dev_name(&dev->dev), protocols); + pr_debug("dev_name=%s protocols=0x%x\n", + dev_name(&dev->dev), protocols); if (!protocols) return -EINVAL; @@ -100,7 +159,7 @@ int nfc_stop_poll(struct nfc_dev *dev) { int rc = 0; - nfc_dbg("dev_name=%s", dev_name(&dev->dev)); + pr_debug("dev_name=%s\n", dev_name(&dev->dev)); device_lock(&dev->dev); @@ -122,6 +181,86 @@ error: return rc; } +int nfc_dep_link_up(struct nfc_dev *dev, int target_index, + u8 comm_mode, u8 rf_mode) +{ + int rc = 0; + + pr_debug("dev_name=%s comm:%d rf:%d\n", + dev_name(&dev->dev), comm_mode, rf_mode); + + if (!dev->ops->dep_link_up) + return -EOPNOTSUPP; + + device_lock(&dev->dev); + + if (!device_is_registered(&dev->dev)) { + rc = -ENODEV; + goto error; + } + + if (dev->dep_link_up == true) { + rc = -EALREADY; + goto error; + } + + rc = dev->ops->dep_link_up(dev, target_index, comm_mode, rf_mode); + +error: + device_unlock(&dev->dev); + return rc; +} + +int nfc_dep_link_down(struct nfc_dev *dev) +{ + int rc = 0; + + pr_debug("dev_name=%s\n", dev_name(&dev->dev)); + + if (!dev->ops->dep_link_down) + return -EOPNOTSUPP; + + device_lock(&dev->dev); + + if (!device_is_registered(&dev->dev)) { + rc = -ENODEV; + goto error; + } + + if (dev->dep_link_up == false) { + rc = -EALREADY; + goto error; + } + + if (dev->dep_rf_mode == NFC_RF_TARGET) { + rc = -EOPNOTSUPP; + goto error; + } + + rc = dev->ops->dep_link_down(dev); + if (!rc) { + dev->dep_link_up = false; + nfc_llcp_mac_is_down(dev); + nfc_genl_dep_link_down_event(dev); + } + +error: + device_unlock(&dev->dev); + return rc; +} + +int nfc_dep_link_is_up(struct nfc_dev *dev, u32 target_idx, + u8 comm_mode, u8 rf_mode) +{ + dev->dep_link_up = true; + dev->dep_rf_mode = rf_mode; + + nfc_llcp_mac_is_up(dev, target_idx, comm_mode, rf_mode); + + return nfc_genl_dep_link_up_event(dev, target_idx, comm_mode, rf_mode); +} +EXPORT_SYMBOL(nfc_dep_link_is_up); + /** * nfc_activate_target - prepare the target for data exchange * @@ -133,8 +272,8 @@ int nfc_activate_target(struct nfc_dev *dev, u32 target_idx, u32 protocol) { int rc; - nfc_dbg("dev_name=%s target_idx=%u protocol=%u", dev_name(&dev->dev), - target_idx, protocol); + pr_debug("dev_name=%s target_idx=%u protocol=%u\n", + dev_name(&dev->dev), target_idx, protocol); device_lock(&dev->dev); @@ -144,6 +283,8 @@ int nfc_activate_target(struct nfc_dev *dev, u32 target_idx, u32 protocol) } rc = dev->ops->activate_target(dev, target_idx, protocol); + if (!rc) + dev->remote_activated = true; error: device_unlock(&dev->dev); @@ -160,7 +301,8 @@ int nfc_deactivate_target(struct nfc_dev *dev, u32 target_idx) { int rc = 0; - nfc_dbg("dev_name=%s target_idx=%u", dev_name(&dev->dev), target_idx); + pr_debug("dev_name=%s target_idx=%u\n", + dev_name(&dev->dev), target_idx); device_lock(&dev->dev); @@ -170,6 +312,7 @@ int nfc_deactivate_target(struct nfc_dev *dev, u32 target_idx) } dev->ops->deactivate_target(dev, target_idx); + dev->remote_activated = false; error: device_unlock(&dev->dev); @@ -194,8 +337,8 @@ int nfc_data_exchange(struct nfc_dev *dev, u32 target_idx, { int rc; - nfc_dbg("dev_name=%s target_idx=%u skb->len=%u", dev_name(&dev->dev), - target_idx, skb->len); + pr_debug("dev_name=%s target_idx=%u skb->len=%u\n", + dev_name(&dev->dev), target_idx, skb->len); device_lock(&dev->dev); @@ -212,13 +355,54 @@ error: return rc; } +int nfc_set_remote_general_bytes(struct nfc_dev *dev, u8 *gb, u8 gb_len) +{ + pr_debug("dev_name=%s gb_len=%d\n", + dev_name(&dev->dev), gb_len); + + if (gb_len > NFC_MAX_GT_LEN) + return -EINVAL; + + return nfc_llcp_set_remote_gb(dev, gb, gb_len); +} +EXPORT_SYMBOL(nfc_set_remote_general_bytes); + +u8 *nfc_get_local_general_bytes(struct nfc_dev *dev, u8 *gt_len) +{ + return nfc_llcp_general_bytes(dev, gt_len); +} +EXPORT_SYMBOL(nfc_get_local_general_bytes); + +/** + * nfc_alloc_send_skb - allocate a skb for data exchange responses + * + * @size: size to allocate + * @gfp: gfp flags + */ +struct sk_buff *nfc_alloc_send_skb(struct nfc_dev *dev, struct sock *sk, + unsigned int flags, unsigned int size, + unsigned int *err) +{ + struct sk_buff *skb; + unsigned int total_size; + + total_size = size + + dev->tx_headroom + dev->tx_tailroom + NFC_HEADER_SIZE; + + skb = sock_alloc_send_skb(sk, total_size, flags & MSG_DONTWAIT, err); + if (skb) + skb_reserve(skb, dev->tx_headroom + NFC_HEADER_SIZE); + + return skb; +} + /** - * nfc_alloc_skb - allocate a skb for data exchange responses + * nfc_alloc_recv_skb - allocate a skb for data exchange responses * * @size: size to allocate * @gfp: gfp flags */ -struct sk_buff *nfc_alloc_skb(unsigned int size, gfp_t gfp) +struct sk_buff *nfc_alloc_recv_skb(unsigned int size, gfp_t gfp) { struct sk_buff *skb; unsigned int total_size; @@ -231,7 +415,7 @@ struct sk_buff *nfc_alloc_skb(unsigned int size, gfp_t gfp) return skb; } -EXPORT_SYMBOL(nfc_alloc_skb); +EXPORT_SYMBOL(nfc_alloc_recv_skb); /** * nfc_targets_found - inform that targets were found @@ -249,7 +433,7 @@ int nfc_targets_found(struct nfc_dev *dev, struct nfc_target *targets, { int i; - nfc_dbg("dev_name=%s n_targets=%d", dev_name(&dev->dev), n_targets); + pr_debug("dev_name=%s n_targets=%d\n", dev_name(&dev->dev), n_targets); dev->polling = false; @@ -283,7 +467,7 @@ static void nfc_release(struct device *d) { struct nfc_dev *dev = to_nfc_dev(d); - nfc_dbg("dev_name=%s", dev_name(&dev->dev)); + pr_debug("dev_name=%s\n", dev_name(&dev->dev)); nfc_genl_data_exit(&dev->genl_data); kfree(dev->targets); @@ -322,7 +506,9 @@ struct nfc_dev *nfc_get_device(unsigned idx) * @supported_protocols: NFC protocols supported by the device */ struct nfc_dev *nfc_allocate_device(struct nfc_ops *ops, - u32 supported_protocols) + u32 supported_protocols, + int tx_headroom, + int tx_tailroom) { static atomic_t dev_no = ATOMIC_INIT(0); struct nfc_dev *dev; @@ -345,6 +531,8 @@ struct nfc_dev *nfc_allocate_device(struct nfc_ops *ops, dev->ops = ops; dev->supported_protocols = supported_protocols; + dev->tx_headroom = tx_headroom; + dev->tx_tailroom = tx_tailroom; spin_lock_init(&dev->targets_lock); nfc_genl_data_init(&dev->genl_data); @@ -365,7 +553,7 @@ int nfc_register_device(struct nfc_dev *dev) { int rc; - nfc_dbg("dev_name=%s", dev_name(&dev->dev)); + pr_debug("dev_name=%s\n", dev_name(&dev->dev)); mutex_lock(&nfc_devlist_mutex); nfc_devlist_generation++; @@ -375,11 +563,14 @@ int nfc_register_device(struct nfc_dev *dev) if (rc < 0) return rc; - rc = nfc_genl_device_added(dev); + rc = nfc_llcp_register_device(dev); if (rc) - nfc_dbg("The userspace won't be notified that the device %s was" - " added", dev_name(&dev->dev)); + pr_err("Could not register llcp device\n"); + rc = nfc_genl_device_added(dev); + if (rc) + pr_debug("The userspace won't be notified that the device %s was added\n", + dev_name(&dev->dev)); return 0; } @@ -394,7 +585,7 @@ void nfc_unregister_device(struct nfc_dev *dev) { int rc; - nfc_dbg("dev_name=%s", dev_name(&dev->dev)); + pr_debug("dev_name=%s\n", dev_name(&dev->dev)); mutex_lock(&nfc_devlist_mutex); nfc_devlist_generation++; @@ -407,10 +598,12 @@ void nfc_unregister_device(struct nfc_dev *dev) mutex_unlock(&nfc_devlist_mutex); + nfc_llcp_unregister_device(dev); + rc = nfc_genl_device_removed(dev); if (rc) - nfc_dbg("The userspace won't be notified that the device %s" - " was removed", dev_name(&dev->dev)); + pr_debug("The userspace won't be notified that the device %s was removed\n", + dev_name(&dev->dev)); } EXPORT_SYMBOL(nfc_unregister_device); @@ -419,7 +612,7 @@ static int __init nfc_init(void) { int rc; - nfc_info("NFC Core ver %s", VERSION); + pr_info("NFC Core ver %s\n", VERSION); rc = class_register(&nfc_class); if (rc) @@ -436,6 +629,10 @@ static int __init nfc_init(void) if (rc) goto err_rawsock; + rc = nfc_llcp_init(); + if (rc) + goto err_llcp_sock; + rc = af_nfc_init(); if (rc) goto err_af_nfc; @@ -443,6 +640,8 @@ static int __init nfc_init(void) return 0; err_af_nfc: + nfc_llcp_exit(); +err_llcp_sock: rawsock_exit(); err_rawsock: nfc_genl_exit(); @@ -454,6 +653,7 @@ err_genl: static void __exit nfc_exit(void) { af_nfc_exit(); + nfc_llcp_exit(); rawsock_exit(); nfc_genl_exit(); class_unregister(&nfc_class); diff --git a/net/nfc/llcp/Kconfig b/net/nfc/llcp/Kconfig new file mode 100644 index 000000000000..fbf5e8150908 --- /dev/null +++ b/net/nfc/llcp/Kconfig @@ -0,0 +1,7 @@ +config NFC_LLCP + depends on NFC && EXPERIMENTAL + bool "NFC LLCP support (EXPERIMENTAL)" + default n + help + Say Y here if you want to build support for a kernel NFC LLCP + implementation.
\ No newline at end of file diff --git a/net/nfc/llcp/commands.c b/net/nfc/llcp/commands.c new file mode 100644 index 000000000000..151f2ef429c4 --- /dev/null +++ b/net/nfc/llcp/commands.c @@ -0,0 +1,399 @@ +/* + * Copyright (C) 2011 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the + * Free Software Foundation, Inc., + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#define pr_fmt(fmt) "llcp: %s: " fmt, __func__ + +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/nfc.h> + +#include <net/nfc/nfc.h> + +#include "../nfc.h" +#include "llcp.h" + +static u8 llcp_tlv_length[LLCP_TLV_MAX] = { + 0, + 1, /* VERSION */ + 2, /* MIUX */ + 2, /* WKS */ + 1, /* LTO */ + 1, /* RW */ + 0, /* SN */ + 1, /* OPT */ + 0, /* SDREQ */ + 2, /* SDRES */ + +}; + +static u8 llcp_tlv8(u8 *tlv, u8 type) +{ + if (tlv[0] != type || tlv[1] != llcp_tlv_length[tlv[0]]) + return 0; + + return tlv[2]; +} + +static u8 llcp_tlv16(u8 *tlv, u8 type) +{ + if (tlv[0] != type || tlv[1] != llcp_tlv_length[tlv[0]]) + return 0; + + return be16_to_cpu(*((__be16 *)(tlv + 2))); +} + + +static u8 llcp_tlv_version(u8 *tlv) +{ + return llcp_tlv8(tlv, LLCP_TLV_VERSION); +} + +static u16 llcp_tlv_miux(u8 *tlv) +{ + return llcp_tlv16(tlv, LLCP_TLV_MIUX) & 0x7f; +} + +static u16 llcp_tlv_wks(u8 *tlv) +{ + return llcp_tlv16(tlv, LLCP_TLV_WKS); +} + +static u16 llcp_tlv_lto(u8 *tlv) +{ + return llcp_tlv8(tlv, LLCP_TLV_LTO); +} + +static u8 llcp_tlv_opt(u8 *tlv) +{ + return llcp_tlv8(tlv, LLCP_TLV_OPT); +} + +static u8 llcp_tlv_rw(u8 *tlv) +{ + return llcp_tlv8(tlv, LLCP_TLV_RW) & 0xf; +} + +u8 *nfc_llcp_build_tlv(u8 type, u8 *value, u8 value_length, u8 *tlv_length) +{ + u8 *tlv, length; + + pr_debug("type %d\n", type); + + if (type >= LLCP_TLV_MAX) + return NULL; + + length = llcp_tlv_length[type]; + if (length == 0 && value_length == 0) + return NULL; + else + length = value_length; + + *tlv_length = 2 + length; + tlv = kzalloc(2 + length, GFP_KERNEL); + if (tlv == NULL) + return tlv; + + tlv[0] = type; + tlv[1] = length; + memcpy(tlv + 2, value, length); + + return tlv; +} + +int nfc_llcp_parse_tlv(struct nfc_llcp_local *local, + u8 *tlv_array, u16 tlv_array_len) +{ + u8 *tlv = tlv_array, type, length, offset = 0; + + pr_debug("TLV array length %d\n", tlv_array_len); + + if (local == NULL) + return -ENODEV; + + while (offset < tlv_array_len) { + type = tlv[0]; + length = tlv[1]; + + pr_debug("type 0x%x length %d\n", type, length); + + switch (type) { + case LLCP_TLV_VERSION: + local->remote_version = llcp_tlv_version(tlv); + break; + case LLCP_TLV_MIUX: + local->remote_miu = llcp_tlv_miux(tlv) + 128; + break; + case LLCP_TLV_WKS: + local->remote_wks = llcp_tlv_wks(tlv); + break; + case LLCP_TLV_LTO: + local->remote_lto = llcp_tlv_lto(tlv) * 10; + break; + case LLCP_TLV_OPT: + local->remote_opt = llcp_tlv_opt(tlv); + break; + case LLCP_TLV_RW: + local->remote_rw = llcp_tlv_rw(tlv); + break; + default: + pr_err("Invalid gt tlv value 0x%x\n", type); + break; + } + + offset += length + 2; + tlv += length + 2; + } + + pr_debug("version 0x%x miu %d lto %d opt 0x%x wks 0x%x rw %d\n", + local->remote_version, local->remote_miu, + local->remote_lto, local->remote_opt, + local->remote_wks, local->remote_rw); + + return 0; +} + +static struct sk_buff *llcp_add_header(struct sk_buff *pdu, + u8 dsap, u8 ssap, u8 ptype) +{ + u8 header[2]; + + pr_debug("ptype 0x%x dsap 0x%x ssap 0x%x\n", ptype, dsap, ssap); + + header[0] = (u8)((dsap << 2) | (ptype >> 2)); + header[1] = (u8)((ptype << 6) | ssap); + + pr_debug("header 0x%x 0x%x\n", header[0], header[1]); + + memcpy(skb_put(pdu, LLCP_HEADER_SIZE), header, LLCP_HEADER_SIZE); + + return pdu; +} + +static struct sk_buff *llcp_add_tlv(struct sk_buff *pdu, u8 *tlv, u8 tlv_length) +{ + /* XXX Add an skb length check */ + + if (tlv == NULL) + return NULL; + + memcpy(skb_put(pdu, tlv_length), tlv, tlv_length); + + return pdu; +} + +static struct sk_buff *llcp_allocate_pdu(struct nfc_llcp_sock *sock, + u8 cmd, u16 size) +{ + struct sk_buff *skb; + int err; + + if (sock->ssap == 0) + return NULL; + + skb = nfc_alloc_send_skb(sock->dev, &sock->sk, MSG_DONTWAIT, + size + LLCP_HEADER_SIZE, &err); + if (skb == NULL) { + pr_err("Could not allocate PDU\n"); + return NULL; + } + + skb = llcp_add_header(skb, sock->dsap, sock->ssap, cmd); + + return skb; +} + +int nfc_llcp_disconnect(struct nfc_llcp_sock *sock) +{ + struct sk_buff *skb; + struct nfc_dev *dev; + struct nfc_llcp_local *local; + u16 size = 0; + + pr_debug("Sending DISC\n"); + + local = sock->local; + if (local == NULL) + return -ENODEV; + + dev = sock->dev; + if (dev == NULL) + return -ENODEV; + + size += LLCP_HEADER_SIZE; + size += dev->tx_headroom + dev->tx_tailroom + NFC_HEADER_SIZE; + + skb = alloc_skb(size, GFP_ATOMIC); + if (skb == NULL) + return -ENOMEM; + + skb_reserve(skb, dev->tx_headroom + NFC_HEADER_SIZE); + + skb = llcp_add_header(skb, sock->ssap, sock->dsap, LLCP_PDU_DISC); + + skb_queue_tail(&local->tx_queue, skb); + + return 0; +} + +int nfc_llcp_send_symm(struct nfc_dev *dev) +{ + struct sk_buff *skb; + struct nfc_llcp_local *local; + u16 size = 0; + + pr_debug("Sending SYMM\n"); + + local = nfc_llcp_find_local(dev); + if (local == NULL) + return -ENODEV; + + size += LLCP_HEADER_SIZE; + size += dev->tx_headroom + dev->tx_tailroom + NFC_HEADER_SIZE; + + skb = alloc_skb(size, GFP_KERNEL); + if (skb == NULL) + return -ENOMEM; + + skb_reserve(skb, dev->tx_headroom + NFC_HEADER_SIZE); + + skb = llcp_add_header(skb, 0, 0, LLCP_PDU_SYMM); + + return nfc_data_exchange(dev, local->target_idx, skb, + nfc_llcp_recv, local); +} + +int nfc_llcp_send_connect(struct nfc_llcp_sock *sock) +{ + struct nfc_llcp_local *local; + struct sk_buff *skb; + u8 *service_name_tlv = NULL, service_name_tlv_length; + int err; + u16 size = 0; + + pr_debug("Sending CONNECT\n"); + + local = sock->local; + if (local == NULL) + return -ENODEV; + + if (sock->service_name != NULL) { + service_name_tlv = nfc_llcp_build_tlv(LLCP_TLV_SN, + sock->service_name, + sock->service_name_len, + &service_name_tlv_length); + size += service_name_tlv_length; + } + + pr_debug("SKB size %d SN length %zu\n", size, sock->service_name_len); + + skb = llcp_allocate_pdu(sock, LLCP_PDU_CONNECT, size); + if (skb == NULL) { + err = -ENOMEM; + goto error_tlv; + } + + if (service_name_tlv != NULL) + skb = llcp_add_tlv(skb, service_name_tlv, + service_name_tlv_length); + + skb_queue_tail(&local->tx_queue, skb); + + return 0; + +error_tlv: + pr_err("error %d\n", err); + + kfree(service_name_tlv); + + return err; +} + +int nfc_llcp_send_cc(struct nfc_llcp_sock *sock) +{ + struct nfc_llcp_local *local; + struct sk_buff *skb; + + pr_debug("Sending CC\n"); + + local = sock->local; + if (local == NULL) + return -ENODEV; + + skb = llcp_allocate_pdu(sock, LLCP_PDU_CC, 0); + if (skb == NULL) + return -ENOMEM; + + skb_queue_tail(&local->tx_queue, skb); + + return 0; +} + +int nfc_llcp_send_dm(struct nfc_llcp_local *local, u8 ssap, u8 dsap, u8 reason) +{ + struct sk_buff *skb; + struct nfc_dev *dev; + u16 size = 1; /* Reason code */ + + pr_debug("Sending DM reason 0x%x\n", reason); + + if (local == NULL) + return -ENODEV; + + dev = local->dev; + if (dev == NULL) + return -ENODEV; + + size += LLCP_HEADER_SIZE; + size += dev->tx_headroom + dev->tx_tailroom + NFC_HEADER_SIZE; + + skb = alloc_skb(size, GFP_KERNEL); + if (skb == NULL) + return -ENOMEM; + + skb_reserve(skb, dev->tx_headroom + NFC_HEADER_SIZE); + + skb = llcp_add_header(skb, ssap, dsap, LLCP_PDU_DM); + + memcpy(skb_put(skb, 1), &reason, 1); + + skb_queue_head(&local->tx_queue, skb); + + return 0; +} + +int nfc_llcp_send_disconnect(struct nfc_llcp_sock *sock) +{ + struct sk_buff *skb; + struct nfc_llcp_local *local; + + pr_debug("Send DISC\n"); + + local = sock->local; + if (local == NULL) + return -ENODEV; + + skb = llcp_allocate_pdu(sock, LLCP_PDU_DISC, 0); + if (skb == NULL) + return -ENOMEM; + + skb_queue_head(&local->tx_queue, skb); + + return 0; +} diff --git a/net/nfc/llcp/llcp.c b/net/nfc/llcp/llcp.c new file mode 100644 index 000000000000..1d32680807d6 --- /dev/null +++ b/net/nfc/llcp/llcp.c @@ -0,0 +1,971 @@ +/* + * Copyright (C) 2011 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the + * Free Software Foundation, Inc., + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#define pr_fmt(fmt) "llcp: %s: " fmt, __func__ + +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/list.h> +#include <linux/nfc.h> + +#include "../nfc.h" +#include "llcp.h" + +static u8 llcp_magic[3] = {0x46, 0x66, 0x6d}; + +static struct list_head llcp_devices; + +static void nfc_llcp_socket_release(struct nfc_llcp_local *local) +{ + struct nfc_llcp_sock *parent, *s, *n; + struct sock *sk, *parent_sk; + int i; + + + mutex_lock(&local->socket_lock); + + for (i = 0; i < LLCP_MAX_SAP; i++) { + parent = local->sockets[i]; + if (parent == NULL) + continue; + + /* Release all child sockets */ + list_for_each_entry_safe(s, n, &parent->list, list) { + list_del(&s->list); + sk = &s->sk; + + lock_sock(sk); + + if (sk->sk_state == LLCP_CONNECTED) + nfc_put_device(s->dev); + + sk->sk_state = LLCP_CLOSED; + sock_set_flag(sk, SOCK_DEAD); + + release_sock(sk); + } + + parent_sk = &parent->sk; + + lock_sock(parent_sk); + + if (parent_sk->sk_state == LLCP_LISTEN) { + struct nfc_llcp_sock *lsk, *n; + struct sock *accept_sk; + + list_for_each_entry_safe(lsk, n, &parent->accept_queue, + accept_queue) { + accept_sk = &lsk->sk; + lock_sock(accept_sk); + + nfc_llcp_accept_unlink(accept_sk); + + accept_sk->sk_state = LLCP_CLOSED; + sock_set_flag(accept_sk, SOCK_DEAD); + + release_sock(accept_sk); + + sock_orphan(accept_sk); + } + } + + if (parent_sk->sk_state == LLCP_CONNECTED) + nfc_put_device(parent->dev); + + parent_sk->sk_state = LLCP_CLOSED; + sock_set_flag(parent_sk, SOCK_DEAD); + + release_sock(parent_sk); + } + + mutex_unlock(&local->socket_lock); +} + +static void nfc_llcp_timeout_work(struct work_struct *work) +{ + struct nfc_llcp_local *local = container_of(work, struct nfc_llcp_local, + timeout_work); + + nfc_dep_link_down(local->dev); +} + +static void nfc_llcp_symm_timer(unsigned long data) +{ + struct nfc_llcp_local *local = (struct nfc_llcp_local *) data; + + pr_err("SYMM timeout\n"); + + queue_work(local->timeout_wq, &local->timeout_work); +} + +struct nfc_llcp_local *nfc_llcp_find_local(struct nfc_dev *dev) +{ + struct nfc_llcp_local *local, *n; + + list_for_each_entry_safe(local, n, &llcp_devices, list) + if (local->dev == dev) + return local; + + pr_debug("No device found\n"); + + return NULL; +} + +static char *wks[] = { + NULL, + NULL, /* SDP */ + "urn:nfc:sn:ip", + "urn:nfc:sn:obex", + "urn:nfc:sn:snep", +}; + +static int nfc_llcp_wks_sap(char *service_name, size_t service_name_len) +{ + int sap, num_wks; + + pr_debug("%s\n", service_name); + + if (service_name == NULL) + return -EINVAL; + + num_wks = ARRAY_SIZE(wks); + + for (sap = 0 ; sap < num_wks; sap++) { + if (wks[sap] == NULL) + continue; + + if (strncmp(wks[sap], service_name, service_name_len) == 0) + return sap; + } + + return -EINVAL; +} + +u8 nfc_llcp_get_sdp_ssap(struct nfc_llcp_local *local, + struct nfc_llcp_sock *sock) +{ + mutex_lock(&local->sdp_lock); + + if (sock->service_name != NULL && sock->service_name_len > 0) { + int ssap = nfc_llcp_wks_sap(sock->service_name, + sock->service_name_len); + + if (ssap > 0) { + pr_debug("WKS %d\n", ssap); + + /* This is a WKS, let's check if it's free */ + if (local->local_wks & BIT(ssap)) { + mutex_unlock(&local->sdp_lock); + + return LLCP_SAP_MAX; + } + + set_bit(BIT(ssap), &local->local_wks); + mutex_unlock(&local->sdp_lock); + + return ssap; + } + + /* + * This is not a well known service, + * we should try to find a local SDP free spot + */ + ssap = find_first_zero_bit(&local->local_sdp, LLCP_SDP_NUM_SAP); + if (ssap == LLCP_SDP_NUM_SAP) { + mutex_unlock(&local->sdp_lock); + + return LLCP_SAP_MAX; + } + + pr_debug("SDP ssap %d\n", LLCP_WKS_NUM_SAP + ssap); + + set_bit(BIT(ssap), &local->local_sdp); + mutex_unlock(&local->sdp_lock); + + return LLCP_WKS_NUM_SAP + ssap; + + } else if (sock->ssap != 0) { + if (sock->ssap < LLCP_WKS_NUM_SAP) { + if (!(local->local_wks & BIT(sock->ssap))) { + set_bit(BIT(sock->ssap), &local->local_wks); + mutex_unlock(&local->sdp_lock); + + return sock->ssap; + } + + } else if (sock->ssap < LLCP_SDP_NUM_SAP) { + if (!(local->local_sdp & + BIT(sock->ssap - LLCP_WKS_NUM_SAP))) { + set_bit(BIT(sock->ssap - LLCP_WKS_NUM_SAP), + &local->local_sdp); + mutex_unlock(&local->sdp_lock); + + return sock->ssap; + } + } + } + + mutex_unlock(&local->sdp_lock); + + return LLCP_SAP_MAX; +} + +u8 nfc_llcp_get_local_ssap(struct nfc_llcp_local *local) +{ + u8 local_ssap; + + mutex_lock(&local->sdp_lock); + + local_ssap = find_first_zero_bit(&local->local_sap, LLCP_LOCAL_NUM_SAP); + if (local_ssap == LLCP_LOCAL_NUM_SAP) { + mutex_unlock(&local->sdp_lock); + return LLCP_SAP_MAX; + } + + set_bit(BIT(local_ssap), &local->local_sap); + + mutex_unlock(&local->sdp_lock); + + return local_ssap + LLCP_LOCAL_SAP_OFFSET; +} + +void nfc_llcp_put_ssap(struct nfc_llcp_local *local, u8 ssap) +{ + u8 local_ssap; + unsigned long *sdp; + + if (ssap < LLCP_WKS_NUM_SAP) { + local_ssap = ssap; + sdp = &local->local_wks; + } else if (ssap < LLCP_LOCAL_NUM_SAP) { + local_ssap = ssap - LLCP_WKS_NUM_SAP; + sdp = &local->local_sdp; + } else if (ssap < LLCP_MAX_SAP) { + local_ssap = ssap - LLCP_LOCAL_NUM_SAP; + sdp = &local->local_sap; + } else { + return; + } + + mutex_lock(&local->sdp_lock); + + clear_bit(1 << local_ssap, sdp); + + mutex_unlock(&local->sdp_lock); +} + +u8 *nfc_llcp_general_bytes(struct nfc_dev *dev, u8 *general_bytes_len) +{ + struct nfc_llcp_local *local; + + local = nfc_llcp_find_local(dev); + if (local == NULL) { + *general_bytes_len = 0; + return NULL; + } + + *general_bytes_len = local->gb_len; + + return local->gb; +} + +static int nfc_llcp_build_gb(struct nfc_llcp_local *local) +{ + u8 *gb_cur, *version_tlv, version, version_length; + u8 *lto_tlv, lto, lto_length; + u8 *wks_tlv, wks_length; + u8 gb_len = 0; + + version = LLCP_VERSION_11; + version_tlv = nfc_llcp_build_tlv(LLCP_TLV_VERSION, &version, + 1, &version_length); + gb_len += version_length; + + /* 1500 ms */ + lto = 150; + lto_tlv = nfc_llcp_build_tlv(LLCP_TLV_VERSION, <o, 1, <o_length); + gb_len += lto_length; + + pr_debug("Local wks 0x%lx\n", local->local_wks); + wks_tlv = nfc_llcp_build_tlv(LLCP_TLV_WKS, (u8 *)&local->local_wks, 2, + &wks_length); + gb_len += wks_length; + + gb_len += ARRAY_SIZE(llcp_magic); + + if (gb_len > NFC_MAX_GT_LEN) { + kfree(version_tlv); + return -EINVAL; + } + + gb_cur = local->gb; + + memcpy(gb_cur, llcp_magic, ARRAY_SIZE(llcp_magic)); + gb_cur += ARRAY_SIZE(llcp_magic); + + memcpy(gb_cur, version_tlv, version_length); + gb_cur += version_length; + + memcpy(gb_cur, lto_tlv, lto_length); + gb_cur += lto_length; + + memcpy(gb_cur, wks_tlv, wks_length); + gb_cur += wks_length; + + kfree(version_tlv); + kfree(lto_tlv); + + local->gb_len = gb_len; + + return 0; +} + +int nfc_llcp_set_remote_gb(struct nfc_dev *dev, u8 *gb, u8 gb_len) +{ + struct nfc_llcp_local *local = nfc_llcp_find_local(dev); + + if (local == NULL) { + pr_err("No LLCP device\n"); + return -ENODEV; + } + + memset(local->remote_gb, 0, NFC_MAX_GT_LEN); + memcpy(local->remote_gb, gb, gb_len); + local->remote_gb_len = gb_len; + + if (local->remote_gb == NULL || + local->remote_gb_len == 0) + return -ENODEV; + + if (memcmp(local->remote_gb, llcp_magic, 3)) { + pr_err("MAC does not support LLCP\n"); + return -EINVAL; + } + + return nfc_llcp_parse_tlv(local, + &local->remote_gb[3], local->remote_gb_len - 3); +} + +static void nfc_llcp_tx_work(struct work_struct *work) +{ + struct nfc_llcp_local *local = container_of(work, struct nfc_llcp_local, + tx_work); + struct sk_buff *skb; + + skb = skb_dequeue(&local->tx_queue); + if (skb != NULL) { + pr_debug("Sending pending skb\n"); + nfc_data_exchange(local->dev, local->target_idx, + skb, nfc_llcp_recv, local); + } else { + nfc_llcp_send_symm(local->dev); + } + + mod_timer(&local->link_timer, + jiffies + msecs_to_jiffies(local->remote_lto)); +} + +static u8 nfc_llcp_dsap(struct sk_buff *pdu) +{ + return (pdu->data[0] & 0xfc) >> 2; +} + +static u8 nfc_llcp_ptype(struct sk_buff *pdu) +{ + return ((pdu->data[0] & 0x03) << 2) | ((pdu->data[1] & 0xc0) >> 6); +} + +static u8 nfc_llcp_ssap(struct sk_buff *pdu) +{ + return pdu->data[1] & 0x3f; +} + +static u8 nfc_llcp_ns(struct sk_buff *pdu) +{ + return pdu->data[2] >> 4; +} + +static u8 nfc_llcp_nr(struct sk_buff *pdu) +{ + return pdu->data[2] & 0xf; +} + +static void nfc_llcp_set_nrns(struct nfc_llcp_sock *sock, struct sk_buff *pdu) +{ + pdu->data[2] = (sock->send_n << 4) | ((sock->recv_n - 1) % 16); + sock->send_n = (sock->send_n + 1) % 16; + sock->recv_ack_n = (sock->recv_n - 1) % 16; +} + +static struct nfc_llcp_sock *nfc_llcp_sock_get(struct nfc_llcp_local *local, + u8 ssap, u8 dsap) +{ + struct nfc_llcp_sock *sock, *llcp_sock, *n; + + if (ssap == 0 && dsap == 0) + return NULL; + + mutex_lock(&local->socket_lock); + sock = local->sockets[ssap]; + if (sock == NULL) { + mutex_unlock(&local->socket_lock); + return NULL; + } + + pr_debug("root dsap %d (%d)\n", sock->dsap, dsap); + + if (sock->dsap == dsap) { + sock_hold(&sock->sk); + mutex_unlock(&local->socket_lock); + return sock; + } + + list_for_each_entry_safe(llcp_sock, n, &sock->list, list) { + pr_debug("llcp_sock %p sk %p dsap %d\n", llcp_sock, + &llcp_sock->sk, llcp_sock->dsap); + if (llcp_sock->dsap == dsap) { + sock_hold(&llcp_sock->sk); + mutex_unlock(&local->socket_lock); + return llcp_sock; + } + } + + pr_err("Could not find socket for %d %d\n", ssap, dsap); + + mutex_unlock(&local->socket_lock); + + return NULL; +} + +static void nfc_llcp_sock_put(struct nfc_llcp_sock *sock) +{ + sock_put(&sock->sk); +} + +static u8 *nfc_llcp_connect_sn(struct sk_buff *skb, size_t *sn_len) +{ + u8 *tlv = &skb->data[2], type, length; + size_t tlv_array_len = skb->len - LLCP_HEADER_SIZE, offset = 0; + + while (offset < tlv_array_len) { + type = tlv[0]; + length = tlv[1]; + + pr_debug("type 0x%x length %d\n", type, length); + + if (type == LLCP_TLV_SN) { + *sn_len = length; + return &tlv[2]; + } + + offset += length + 2; + tlv += length + 2; + } + + return NULL; +} + +static void nfc_llcp_recv_connect(struct nfc_llcp_local *local, + struct sk_buff *skb) +{ + struct sock *new_sk, *parent; + struct nfc_llcp_sock *sock, *new_sock; + u8 dsap, ssap, bound_sap, reason; + + dsap = nfc_llcp_dsap(skb); + ssap = nfc_llcp_ssap(skb); + + pr_debug("%d %d\n", dsap, ssap); + + nfc_llcp_parse_tlv(local, &skb->data[LLCP_HEADER_SIZE], + skb->len - LLCP_HEADER_SIZE); + + if (dsap != LLCP_SAP_SDP) { + bound_sap = dsap; + + mutex_lock(&local->socket_lock); + sock = local->sockets[dsap]; + if (sock == NULL) { + mutex_unlock(&local->socket_lock); + reason = LLCP_DM_NOBOUND; + goto fail; + } + + sock_hold(&sock->sk); + mutex_unlock(&local->socket_lock); + + lock_sock(&sock->sk); + + if (sock->dsap == LLCP_SAP_SDP && + sock->sk.sk_state == LLCP_LISTEN) + goto enqueue; + } else { + u8 *sn; + size_t sn_len; + + sn = nfc_llcp_connect_sn(skb, &sn_len); + if (sn == NULL) { + reason = LLCP_DM_NOBOUND; + goto fail; + } + + pr_debug("Service name length %zu\n", sn_len); + + mutex_lock(&local->socket_lock); + for (bound_sap = 0; bound_sap < LLCP_LOCAL_SAP_OFFSET; + bound_sap++) { + sock = local->sockets[bound_sap]; + if (sock == NULL) + continue; + + if (sock->service_name == NULL || + sock->service_name_len == 0) + continue; + + if (sock->service_name_len != sn_len) + continue; + + if (sock->dsap == LLCP_SAP_SDP && + sock->sk.sk_state == LLCP_LISTEN && + !memcmp(sn, sock->service_name, sn_len)) { + pr_debug("Found service name at SAP %d\n", + bound_sap); + sock_hold(&sock->sk); + mutex_unlock(&local->socket_lock); + + lock_sock(&sock->sk); + + goto enqueue; + } + } + mutex_unlock(&local->socket_lock); + } + + reason = LLCP_DM_NOBOUND; + goto fail; + +enqueue: + parent = &sock->sk; + + if (sk_acceptq_is_full(parent)) { + reason = LLCP_DM_REJ; + release_sock(&sock->sk); + sock_put(&sock->sk); + goto fail; + } + + new_sk = nfc_llcp_sock_alloc(NULL, parent->sk_type, + GFP_ATOMIC); + if (new_sk == NULL) { + reason = LLCP_DM_REJ; + release_sock(&sock->sk); + sock_put(&sock->sk); + goto fail; + } + + new_sock = nfc_llcp_sock(new_sk); + new_sock->dev = local->dev; + new_sock->local = local; + new_sock->nfc_protocol = sock->nfc_protocol; + new_sock->ssap = bound_sap; + new_sock->dsap = ssap; + new_sock->parent = parent; + + pr_debug("new sock %p sk %p\n", new_sock, &new_sock->sk); + + list_add_tail(&new_sock->list, &sock->list); + + nfc_llcp_accept_enqueue(&sock->sk, new_sk); + + nfc_get_device(local->dev->idx); + + new_sk->sk_state = LLCP_CONNECTED; + + /* Wake the listening processes */ + parent->sk_data_ready(parent, 0); + + /* Send CC */ + nfc_llcp_send_cc(new_sock); + + release_sock(&sock->sk); + sock_put(&sock->sk); + + return; + +fail: + /* Send DM */ + nfc_llcp_send_dm(local, dsap, ssap, reason); + + return; + +} + +static void nfc_llcp_recv_hdlc(struct nfc_llcp_local *local, + struct sk_buff *skb) +{ + struct nfc_llcp_sock *llcp_sock; + struct sock *sk; + u8 dsap, ssap, ptype, ns, nr; + + ptype = nfc_llcp_ptype(skb); + dsap = nfc_llcp_dsap(skb); + ssap = nfc_llcp_ssap(skb); + ns = nfc_llcp_ns(skb); + nr = nfc_llcp_nr(skb); + + pr_debug("%d %d R %d S %d\n", dsap, ssap, nr, ns); + + llcp_sock = nfc_llcp_sock_get(local, dsap, ssap); + if (llcp_sock == NULL) { + nfc_llcp_send_dm(local, dsap, ssap, LLCP_DM_NOCONN); + return; + } + + sk = &llcp_sock->sk; + lock_sock(sk); + if (sk->sk_state == LLCP_CLOSED) { + release_sock(sk); + nfc_llcp_sock_put(llcp_sock); + } + + if (ns == llcp_sock->recv_n) + llcp_sock->recv_n = (llcp_sock->recv_n + 1) % 16; + else + pr_err("Received out of sequence I PDU\n"); + + /* Pass the payload upstream */ + if (ptype == LLCP_PDU_I) { + pr_debug("I frame, queueing on %p\n", &llcp_sock->sk); + + skb_pull(skb, LLCP_HEADER_SIZE + LLCP_SEQUENCE_SIZE); + if (sock_queue_rcv_skb(&llcp_sock->sk, skb)) { + pr_err("receive queue is full\n"); + skb_queue_head(&llcp_sock->tx_backlog_queue, skb); + } + } + + /* Remove skbs from the pending queue */ + if (llcp_sock->send_ack_n != nr) { + struct sk_buff *s, *tmp; + + llcp_sock->send_ack_n = nr; + + skb_queue_walk_safe(&llcp_sock->tx_pending_queue, s, tmp) + if (nfc_llcp_ns(s) <= nr) { + skb_unlink(s, &llcp_sock->tx_pending_queue); + kfree_skb(s); + } + } + + /* Queue some I frames for transmission */ + while (llcp_sock->remote_ready && + skb_queue_len(&llcp_sock->tx_pending_queue) <= local->remote_rw) { + struct sk_buff *pdu, *pending_pdu; + + pdu = skb_dequeue(&llcp_sock->tx_queue); + if (pdu == NULL) + break; + + /* Update N(S)/N(R) */ + nfc_llcp_set_nrns(llcp_sock, pdu); + + pending_pdu = skb_clone(pdu, GFP_KERNEL); + + skb_queue_tail(&local->tx_queue, pdu); + skb_queue_tail(&llcp_sock->tx_pending_queue, pending_pdu); + } + + release_sock(sk); + nfc_llcp_sock_put(llcp_sock); +} + +static void nfc_llcp_recv_disc(struct nfc_llcp_local *local, + struct sk_buff *skb) +{ + struct nfc_llcp_sock *llcp_sock; + struct sock *sk; + u8 dsap, ssap; + + dsap = nfc_llcp_dsap(skb); + ssap = nfc_llcp_ssap(skb); + + llcp_sock = nfc_llcp_sock_get(local, dsap, ssap); + if (llcp_sock == NULL) { + nfc_llcp_send_dm(local, dsap, ssap, LLCP_DM_NOCONN); + return; + } + + sk = &llcp_sock->sk; + lock_sock(sk); + if (sk->sk_state == LLCP_CLOSED) { + release_sock(sk); + nfc_llcp_sock_put(llcp_sock); + } + + + if (sk->sk_state == LLCP_CONNECTED) { + nfc_put_device(local->dev); + sk->sk_state = LLCP_CLOSED; + sk->sk_state_change(sk); + } + + nfc_llcp_send_dm(local, dsap, ssap, LLCP_DM_DISC); + + release_sock(sk); + nfc_llcp_sock_put(llcp_sock); +} + +static void nfc_llcp_recv_cc(struct nfc_llcp_local *local, + struct sk_buff *skb) +{ + struct nfc_llcp_sock *llcp_sock; + u8 dsap, ssap; + + + dsap = nfc_llcp_dsap(skb); + ssap = nfc_llcp_ssap(skb); + + llcp_sock = nfc_llcp_sock_get(local, dsap, ssap); + + if (llcp_sock == NULL) + llcp_sock = nfc_llcp_sock_get(local, dsap, LLCP_SAP_SDP); + + if (llcp_sock == NULL) { + pr_err("Invalid CC\n"); + nfc_llcp_send_dm(local, dsap, ssap, LLCP_DM_NOCONN); + + return; + } + + llcp_sock->dsap = ssap; + + nfc_llcp_parse_tlv(local, &skb->data[LLCP_HEADER_SIZE], + skb->len - LLCP_HEADER_SIZE); + + nfc_llcp_sock_put(llcp_sock); +} + +static void nfc_llcp_rx_work(struct work_struct *work) +{ + struct nfc_llcp_local *local = container_of(work, struct nfc_llcp_local, + rx_work); + u8 dsap, ssap, ptype; + struct sk_buff *skb; + + skb = local->rx_pending; + if (skb == NULL) { + pr_debug("No pending SKB\n"); + return; + } + + ptype = nfc_llcp_ptype(skb); + dsap = nfc_llcp_dsap(skb); + ssap = nfc_llcp_ssap(skb); + + pr_debug("ptype 0x%x dsap 0x%x ssap 0x%x\n", ptype, dsap, ssap); + + switch (ptype) { + case LLCP_PDU_SYMM: + pr_debug("SYMM\n"); + break; + + case LLCP_PDU_CONNECT: + pr_debug("CONNECT\n"); + nfc_llcp_recv_connect(local, skb); + break; + + case LLCP_PDU_DISC: + pr_debug("DISC\n"); + nfc_llcp_recv_disc(local, skb); + break; + + case LLCP_PDU_CC: + pr_debug("CC\n"); + nfc_llcp_recv_cc(local, skb); + break; + + case LLCP_PDU_I: + case LLCP_PDU_RR: + pr_debug("I frame\n"); + nfc_llcp_recv_hdlc(local, skb); + break; + + } + + queue_work(local->tx_wq, &local->tx_work); + kfree_skb(local->rx_pending); + local->rx_pending = NULL; + + return; +} + +void nfc_llcp_recv(void *data, struct sk_buff *skb, int err) +{ + struct nfc_llcp_local *local = (struct nfc_llcp_local *) data; + + pr_debug("Received an LLCP PDU\n"); + if (err < 0) { + pr_err("err %d", err); + return; + } + + local->rx_pending = skb_get(skb); + del_timer(&local->link_timer); + queue_work(local->rx_wq, &local->rx_work); + + return; +} + +void nfc_llcp_mac_is_down(struct nfc_dev *dev) +{ + struct nfc_llcp_local *local; + + local = nfc_llcp_find_local(dev); + if (local == NULL) + return; + + /* Close and purge all existing sockets */ + nfc_llcp_socket_release(local); +} + +void nfc_llcp_mac_is_up(struct nfc_dev *dev, u32 target_idx, + u8 comm_mode, u8 rf_mode) +{ + struct nfc_llcp_local *local; + + pr_debug("rf mode %d\n", rf_mode); + + local = nfc_llcp_find_local(dev); + if (local == NULL) + return; + + local->target_idx = target_idx; + local->comm_mode = comm_mode; + local->rf_mode = rf_mode; + + if (rf_mode == NFC_RF_INITIATOR) { + pr_debug("Queueing Tx work\n"); + + queue_work(local->tx_wq, &local->tx_work); + } else { + mod_timer(&local->link_timer, + jiffies + msecs_to_jiffies(local->remote_lto)); + } +} + +int nfc_llcp_register_device(struct nfc_dev *ndev) +{ + struct device *dev = &ndev->dev; + struct nfc_llcp_local *local; + char name[32]; + int err; + + local = kzalloc(sizeof(struct nfc_llcp_local), GFP_KERNEL); + if (local == NULL) + return -ENOMEM; + + local->dev = ndev; + INIT_LIST_HEAD(&local->list); + mutex_init(&local->sdp_lock); + mutex_init(&local->socket_lock); + init_timer(&local->link_timer); + local->link_timer.data = (unsigned long) local; + local->link_timer.function = nfc_llcp_symm_timer; + + skb_queue_head_init(&local->tx_queue); + INIT_WORK(&local->tx_work, nfc_llcp_tx_work); + snprintf(name, sizeof(name), "%s_llcp_tx_wq", dev_name(dev)); + local->tx_wq = alloc_workqueue(name, + WQ_NON_REENTRANT | WQ_UNBOUND | WQ_MEM_RECLAIM, 1); + if (local->tx_wq == NULL) { + err = -ENOMEM; + goto err_local; + } + + local->rx_pending = NULL; + INIT_WORK(&local->rx_work, nfc_llcp_rx_work); + snprintf(name, sizeof(name), "%s_llcp_rx_wq", dev_name(dev)); + local->rx_wq = alloc_workqueue(name, + WQ_NON_REENTRANT | WQ_UNBOUND | WQ_MEM_RECLAIM, 1); + if (local->rx_wq == NULL) { + err = -ENOMEM; + goto err_tx_wq; + } + + INIT_WORK(&local->timeout_work, nfc_llcp_timeout_work); + snprintf(name, sizeof(name), "%s_llcp_timeout_wq", dev_name(dev)); + local->timeout_wq = alloc_workqueue(name, + WQ_NON_REENTRANT | WQ_UNBOUND | WQ_MEM_RECLAIM, 1); + if (local->timeout_wq == NULL) { + err = -ENOMEM; + goto err_rx_wq; + } + + nfc_llcp_build_gb(local); + + local->remote_miu = LLCP_DEFAULT_MIU; + local->remote_lto = LLCP_DEFAULT_LTO; + local->remote_rw = LLCP_DEFAULT_RW; + + list_add(&llcp_devices, &local->list); + + return 0; + +err_rx_wq: + destroy_workqueue(local->rx_wq); + +err_tx_wq: + destroy_workqueue(local->tx_wq); + +err_local: + kfree(local); + + return 0; +} + +void nfc_llcp_unregister_device(struct nfc_dev *dev) +{ + struct nfc_llcp_local *local = nfc_llcp_find_local(dev); + + if (local == NULL) { + pr_debug("No such device\n"); + return; + } + + list_del(&local->list); + nfc_llcp_socket_release(local); + del_timer_sync(&local->link_timer); + skb_queue_purge(&local->tx_queue); + destroy_workqueue(local->tx_wq); + destroy_workqueue(local->rx_wq); + kfree_skb(local->rx_pending); + kfree(local); +} + +int __init nfc_llcp_init(void) +{ + INIT_LIST_HEAD(&llcp_devices); + + return nfc_llcp_sock_init(); +} + +void nfc_llcp_exit(void) +{ + nfc_llcp_sock_exit(); +} diff --git a/net/nfc/llcp/llcp.h b/net/nfc/llcp/llcp.h new file mode 100644 index 000000000000..0ad2e3361584 --- /dev/null +++ b/net/nfc/llcp/llcp.h @@ -0,0 +1,193 @@ +/* + * Copyright (C) 2011 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the + * Free Software Foundation, Inc., + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +enum llcp_state { + LLCP_CONNECTED = 1, /* wait_for_packet() wants that */ + LLCP_CLOSED, + LLCP_BOUND, + LLCP_LISTEN, +}; + +#define LLCP_DEFAULT_LTO 100 +#define LLCP_DEFAULT_RW 1 +#define LLCP_DEFAULT_MIU 128 + +#define LLCP_WKS_NUM_SAP 16 +#define LLCP_SDP_NUM_SAP 16 +#define LLCP_LOCAL_NUM_SAP 32 +#define LLCP_LOCAL_SAP_OFFSET (LLCP_WKS_NUM_SAP + LLCP_SDP_NUM_SAP) +#define LLCP_MAX_SAP (LLCP_WKS_NUM_SAP + LLCP_SDP_NUM_SAP + LLCP_LOCAL_NUM_SAP) + +struct nfc_llcp_sock; + +struct nfc_llcp_local { + struct list_head list; + struct nfc_dev *dev; + + struct mutex sdp_lock; + struct mutex socket_lock; + + struct timer_list link_timer; + struct sk_buff_head tx_queue; + struct workqueue_struct *tx_wq; + struct work_struct tx_work; + struct workqueue_struct *rx_wq; + struct work_struct rx_work; + struct sk_buff *rx_pending; + struct workqueue_struct *timeout_wq; + struct work_struct timeout_work; + + u32 target_idx; + u8 rf_mode; + u8 comm_mode; + unsigned long local_wks; /* Well known services */ + unsigned long local_sdp; /* Local services */ + unsigned long local_sap; /* Local SAPs, not available for discovery */ + + /* local */ + u8 gb[NFC_MAX_GT_LEN]; + u8 gb_len; + + /* remote */ + u8 remote_gb[NFC_MAX_GT_LEN]; + u8 remote_gb_len; + + u8 remote_version; + u16 remote_miu; + u16 remote_lto; + u8 remote_opt; + u16 remote_wks; + u8 remote_rw; + + /* sockets array */ + struct nfc_llcp_sock *sockets[LLCP_MAX_SAP]; +}; + +struct nfc_llcp_sock { + struct sock sk; + struct list_head list; + struct nfc_dev *dev; + struct nfc_llcp_local *local; + u32 target_idx; + u32 nfc_protocol; + + u8 ssap; + u8 dsap; + char *service_name; + size_t service_name_len; + + /* Link variables */ + u8 send_n; + u8 send_ack_n; + u8 recv_n; + u8 recv_ack_n; + + /* Is the remote peer ready to receive */ + u8 remote_ready; + + struct sk_buff_head tx_queue; + struct sk_buff_head tx_pending_queue; + struct sk_buff_head tx_backlog_queue; + + struct list_head accept_queue; + struct sock *parent; +}; + +#define nfc_llcp_sock(sk) ((struct nfc_llcp_sock *) (sk)) +#define nfc_llcp_dev(sk) (nfc_llcp_sock((sk))->dev) + +#define LLCP_HEADER_SIZE 2 +#define LLCP_SEQUENCE_SIZE 1 + +/* LLCP versions: 1.1 is 1.0 plus SDP */ +#define LLCP_VERSION_10 0x10 +#define LLCP_VERSION_11 0x11 + +/* LLCP PDU types */ +#define LLCP_PDU_SYMM 0x0 +#define LLCP_PDU_PAX 0x1 +#define LLCP_PDU_AGF 0x2 +#define LLCP_PDU_UI 0x3 +#define LLCP_PDU_CONNECT 0x4 +#define LLCP_PDU_DISC 0x5 +#define LLCP_PDU_CC 0x6 +#define LLCP_PDU_DM 0x7 +#define LLCP_PDU_FRMR 0x8 +#define LLCP_PDU_SNL 0x9 +#define LLCP_PDU_I 0xc +#define LLCP_PDU_RR 0xd +#define LLCP_PDU_RNR 0xe + +/* Parameters TLV types */ +#define LLCP_TLV_VERSION 0x1 +#define LLCP_TLV_MIUX 0x2 +#define LLCP_TLV_WKS 0x3 +#define LLCP_TLV_LTO 0x4 +#define LLCP_TLV_RW 0x5 +#define LLCP_TLV_SN 0x6 +#define LLCP_TLV_OPT 0x7 +#define LLCP_TLV_SDREQ 0x8 +#define LLCP_TLV_SDRES 0x9 +#define LLCP_TLV_MAX 0xa + +/* Well known LLCP SAP */ +#define LLCP_SAP_SDP 0x1 +#define LLCP_SAP_IP 0x2 +#define LLCP_SAP_OBEX 0x3 +#define LLCP_SAP_SNEP 0x4 +#define LLCP_SAP_MAX 0xff + +/* Disconnection reason code */ +#define LLCP_DM_DISC 0x00 +#define LLCP_DM_NOCONN 0x01 +#define LLCP_DM_NOBOUND 0x02 +#define LLCP_DM_REJ 0x03 + + +struct nfc_llcp_local *nfc_llcp_find_local(struct nfc_dev *dev); +u8 nfc_llcp_get_sdp_ssap(struct nfc_llcp_local *local, + struct nfc_llcp_sock *sock); +u8 nfc_llcp_get_local_ssap(struct nfc_llcp_local *local); +void nfc_llcp_put_ssap(struct nfc_llcp_local *local, u8 ssap); + +/* Sock API */ +struct sock *nfc_llcp_sock_alloc(struct socket *sock, int type, gfp_t gfp); +void nfc_llcp_sock_free(struct nfc_llcp_sock *sock); +void nfc_llcp_accept_unlink(struct sock *sk); +void nfc_llcp_accept_enqueue(struct sock *parent, struct sock *sk); +struct sock *nfc_llcp_accept_dequeue(struct sock *sk, struct socket *newsock); + +/* TLV API */ +int nfc_llcp_parse_tlv(struct nfc_llcp_local *local, + u8 *tlv_array, u16 tlv_array_len); + +/* Commands API */ +void nfc_llcp_recv(void *data, struct sk_buff *skb, int err); +u8 *nfc_llcp_build_tlv(u8 type, u8 *value, u8 value_length, u8 *tlv_length); +void nfc_llcp_recv(void *data, struct sk_buff *skb, int err); +int nfc_llcp_disconnect(struct nfc_llcp_sock *sock); +int nfc_llcp_send_symm(struct nfc_dev *dev); +int nfc_llcp_send_connect(struct nfc_llcp_sock *sock); +int nfc_llcp_send_cc(struct nfc_llcp_sock *sock); +int nfc_llcp_send_dm(struct nfc_llcp_local *local, u8 ssap, u8 dsap, u8 reason); +int nfc_llcp_send_disconnect(struct nfc_llcp_sock *sock); + +/* Socket API */ +int __init nfc_llcp_sock_init(void); +void nfc_llcp_sock_exit(void); diff --git a/net/nfc/llcp/sock.c b/net/nfc/llcp/sock.c new file mode 100644 index 000000000000..f738ccd535f1 --- /dev/null +++ b/net/nfc/llcp/sock.c @@ -0,0 +1,675 @@ +/* + * Copyright (C) 2011 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the + * Free Software Foundation, Inc., + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#define pr_fmt(fmt) "llcp: %s: " fmt, __func__ + +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/nfc.h> + +#include "../nfc.h" +#include "llcp.h" + +static struct proto llcp_sock_proto = { + .name = "NFC_LLCP", + .owner = THIS_MODULE, + .obj_size = sizeof(struct nfc_llcp_sock), +}; + +static int llcp_sock_bind(struct socket *sock, struct sockaddr *addr, int alen) +{ + struct sock *sk = sock->sk; + struct nfc_llcp_sock *llcp_sock = nfc_llcp_sock(sk); + struct nfc_llcp_local *local; + struct nfc_dev *dev; + struct sockaddr_nfc_llcp llcp_addr; + int len, ret = 0; + + pr_debug("sk %p addr %p family %d\n", sk, addr, addr->sa_family); + + if (!addr || addr->sa_family != AF_NFC) + return -EINVAL; + + memset(&llcp_addr, 0, sizeof(llcp_addr)); + len = min_t(unsigned int, sizeof(llcp_addr), alen); + memcpy(&llcp_addr, addr, len); + + /* This is going to be a listening socket, dsap must be 0 */ + if (llcp_addr.dsap != 0) + return -EINVAL; + + lock_sock(sk); + + if (sk->sk_state != LLCP_CLOSED) { + ret = -EBADFD; + goto error; + } + + dev = nfc_get_device(llcp_addr.dev_idx); + if (dev == NULL) { + ret = -ENODEV; + goto error; + } + + local = nfc_llcp_find_local(dev); + if (local == NULL) { + ret = -ENODEV; + goto put_dev; + } + + llcp_sock->dev = dev; + llcp_sock->local = local; + llcp_sock->nfc_protocol = llcp_addr.nfc_protocol; + llcp_sock->service_name_len = min_t(unsigned int, + llcp_addr.service_name_len, NFC_LLCP_MAX_SERVICE_NAME); + llcp_sock->service_name = kmemdup(llcp_addr.service_name, + llcp_sock->service_name_len, GFP_KERNEL); + + llcp_sock->ssap = nfc_llcp_get_sdp_ssap(local, llcp_sock); + if (llcp_sock->ssap == LLCP_MAX_SAP) + goto put_dev; + + local->sockets[llcp_sock->ssap] = llcp_sock; + + pr_debug("Socket bound to SAP %d\n", llcp_sock->ssap); + + sk->sk_state = LLCP_BOUND; + +put_dev: + nfc_put_device(dev); + +error: + release_sock(sk); + return ret; +} + +static int llcp_sock_listen(struct socket *sock, int backlog) +{ + struct sock *sk = sock->sk; + int ret = 0; + + pr_debug("sk %p backlog %d\n", sk, backlog); + + lock_sock(sk); + + if ((sock->type != SOCK_SEQPACKET && sock->type != SOCK_STREAM) + || sk->sk_state != LLCP_BOUND) { + ret = -EBADFD; + goto error; + } + + sk->sk_max_ack_backlog = backlog; + sk->sk_ack_backlog = 0; + + pr_debug("Socket listening\n"); + sk->sk_state = LLCP_LISTEN; + +error: + release_sock(sk); + + return ret; +} + +void nfc_llcp_accept_unlink(struct sock *sk) +{ + struct nfc_llcp_sock *llcp_sock = nfc_llcp_sock(sk); + + pr_debug("state %d\n", sk->sk_state); + + list_del_init(&llcp_sock->accept_queue); + sk_acceptq_removed(llcp_sock->parent); + llcp_sock->parent = NULL; + + sock_put(sk); +} + +void nfc_llcp_accept_enqueue(struct sock *parent, struct sock *sk) +{ + struct nfc_llcp_sock *llcp_sock = nfc_llcp_sock(sk); + struct nfc_llcp_sock *llcp_sock_parent = nfc_llcp_sock(parent); + + /* Lock will be free from unlink */ + sock_hold(sk); + + list_add_tail(&llcp_sock->accept_queue, + &llcp_sock_parent->accept_queue); + llcp_sock->parent = parent; + sk_acceptq_added(parent); +} + +struct sock *nfc_llcp_accept_dequeue(struct sock *parent, + struct socket *newsock) +{ + struct nfc_llcp_sock *lsk, *n, *llcp_parent; + struct sock *sk; + + llcp_parent = nfc_llcp_sock(parent); + + list_for_each_entry_safe(lsk, n, &llcp_parent->accept_queue, + accept_queue) { + sk = &lsk->sk; + lock_sock(sk); + + if (sk->sk_state == LLCP_CLOSED) { + release_sock(sk); + nfc_llcp_accept_unlink(sk); + continue; + } + + if (sk->sk_state == LLCP_CONNECTED || !newsock) { + nfc_llcp_accept_unlink(sk); + if (newsock) + sock_graft(sk, newsock); + + release_sock(sk); + + pr_debug("Returning sk state %d\n", sk->sk_state); + + return sk; + } + + release_sock(sk); + } + + return NULL; +} + +static int llcp_sock_accept(struct socket *sock, struct socket *newsock, + int flags) +{ + DECLARE_WAITQUEUE(wait, current); + struct sock *sk = sock->sk, *new_sk; + long timeo; + int ret = 0; + + pr_debug("parent %p\n", sk); + + lock_sock_nested(sk, SINGLE_DEPTH_NESTING); + + if (sk->sk_state != LLCP_LISTEN) { + ret = -EBADFD; + goto error; + } + + timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); + + /* Wait for an incoming connection. */ + add_wait_queue_exclusive(sk_sleep(sk), &wait); + while (!(new_sk = nfc_llcp_accept_dequeue(sk, newsock))) { + set_current_state(TASK_INTERRUPTIBLE); + + if (!timeo) { + ret = -EAGAIN; + break; + } + + if (signal_pending(current)) { + ret = sock_intr_errno(timeo); + break; + } + + release_sock(sk); + timeo = schedule_timeout(timeo); + lock_sock_nested(sk, SINGLE_DEPTH_NESTING); + } + __set_current_state(TASK_RUNNING); + remove_wait_queue(sk_sleep(sk), &wait); + + if (ret) + goto error; + + newsock->state = SS_CONNECTED; + + pr_debug("new socket %p\n", new_sk); + +error: + release_sock(sk); + + return ret; +} + +static int llcp_sock_getname(struct socket *sock, struct sockaddr *addr, + int *len, int peer) +{ + struct sockaddr_nfc_llcp *llcp_addr = (struct sockaddr_nfc_llcp *) addr; + struct sock *sk = sock->sk; + struct nfc_llcp_sock *llcp_sock = nfc_llcp_sock(sk); + + pr_debug("%p\n", sk); + + addr->sa_family = AF_NFC; + *len = sizeof(struct sockaddr_nfc_llcp); + + llcp_addr->dev_idx = llcp_sock->dev->idx; + llcp_addr->dsap = llcp_sock->dsap; + llcp_addr->ssap = llcp_sock->ssap; + llcp_addr->service_name_len = llcp_sock->service_name_len; + memcpy(llcp_addr->service_name, llcp_sock->service_name, + llcp_addr->service_name_len); + + return 0; +} + +static inline unsigned int llcp_accept_poll(struct sock *parent) +{ + struct nfc_llcp_sock *llcp_sock, *n, *parent_sock; + struct sock *sk; + + parent_sock = nfc_llcp_sock(parent); + + list_for_each_entry_safe(llcp_sock, n, &parent_sock->accept_queue, + accept_queue) { + sk = &llcp_sock->sk; + + if (sk->sk_state == LLCP_CONNECTED) + return POLLIN | POLLRDNORM; + } + + return 0; +} + +static unsigned int llcp_sock_poll(struct file *file, struct socket *sock, + poll_table *wait) +{ + struct sock *sk = sock->sk; + unsigned int mask = 0; + + pr_debug("%p\n", sk); + + sock_poll_wait(file, sk_sleep(sk), wait); + + if (sk->sk_state == LLCP_LISTEN) + return llcp_accept_poll(sk); + + if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) + mask |= POLLERR; + + if (!skb_queue_empty(&sk->sk_receive_queue)) + mask |= POLLIN; + + if (sk->sk_state == LLCP_CLOSED) + mask |= POLLHUP; + + return mask; +} + +static int llcp_sock_release(struct socket *sock) +{ + struct sock *sk = sock->sk; + struct nfc_llcp_local *local; + struct nfc_llcp_sock *llcp_sock = nfc_llcp_sock(sk); + + if (!sk) + return 0; + + pr_debug("%p\n", sk); + + local = llcp_sock->local; + if (local == NULL) + return -ENODEV; + + mutex_lock(&local->socket_lock); + + if (llcp_sock == local->sockets[llcp_sock->ssap]) { + local->sockets[llcp_sock->ssap] = NULL; + } else { + struct nfc_llcp_sock *parent, *s, *n; + + parent = local->sockets[llcp_sock->ssap]; + + list_for_each_entry_safe(s, n, &parent->list, list) + if (llcp_sock == s) { + list_del(&s->list); + break; + } + + } + + mutex_unlock(&local->socket_lock); + + lock_sock(sk); + + /* Send a DISC */ + if (sk->sk_state == LLCP_CONNECTED) + nfc_llcp_disconnect(llcp_sock); + + if (sk->sk_state == LLCP_LISTEN) { + struct nfc_llcp_sock *lsk, *n; + struct sock *accept_sk; + + list_for_each_entry_safe(lsk, n, &llcp_sock->accept_queue, + accept_queue) { + accept_sk = &lsk->sk; + lock_sock(accept_sk); + + nfc_llcp_disconnect(lsk); + nfc_llcp_accept_unlink(accept_sk); + + release_sock(accept_sk); + + sock_set_flag(sk, SOCK_DEAD); + sock_orphan(accept_sk); + sock_put(accept_sk); + } + } + + /* Freeing the SAP */ + if ((sk->sk_state == LLCP_CONNECTED + && llcp_sock->ssap > LLCP_LOCAL_SAP_OFFSET) || + sk->sk_state == LLCP_BOUND || + sk->sk_state == LLCP_LISTEN) + nfc_llcp_put_ssap(llcp_sock->local, llcp_sock->ssap); + + sock_set_flag(sk, SOCK_DEAD); + + release_sock(sk); + + sock_orphan(sk); + sock_put(sk); + + return 0; +} + +static int llcp_sock_connect(struct socket *sock, struct sockaddr *_addr, + int len, int flags) +{ + struct sock *sk = sock->sk; + struct nfc_llcp_sock *llcp_sock = nfc_llcp_sock(sk); + struct sockaddr_nfc_llcp *addr = (struct sockaddr_nfc_llcp *)_addr; + struct nfc_dev *dev; + struct nfc_llcp_local *local; + int ret = 0; + + pr_debug("sock %p sk %p flags 0x%x\n", sock, sk, flags); + + if (!addr || len < sizeof(struct sockaddr_nfc) || + addr->sa_family != AF_NFC) { + pr_err("Invalid socket\n"); + return -EINVAL; + } + + if (addr->service_name_len == 0 && addr->dsap == 0) { + pr_err("Missing service name or dsap\n"); + return -EINVAL; + } + + pr_debug("addr dev_idx=%u target_idx=%u protocol=%u\n", addr->dev_idx, + addr->target_idx, addr->nfc_protocol); + + lock_sock(sk); + + if (sk->sk_state == LLCP_CONNECTED) { + ret = -EISCONN; + goto error; + } + + dev = nfc_get_device(addr->dev_idx); + if (dev == NULL) { + ret = -ENODEV; + goto error; + } + + local = nfc_llcp_find_local(dev); + if (local == NULL) { + ret = -ENODEV; + goto put_dev; + } + + device_lock(&dev->dev); + if (dev->dep_link_up == false) { + ret = -ENOLINK; + device_unlock(&dev->dev); + goto put_dev; + } + device_unlock(&dev->dev); + + if (local->rf_mode == NFC_RF_INITIATOR && + addr->target_idx != local->target_idx) { + ret = -ENOLINK; + goto put_dev; + } + + llcp_sock->dev = dev; + llcp_sock->local = local; + llcp_sock->ssap = nfc_llcp_get_local_ssap(local); + if (llcp_sock->ssap == LLCP_SAP_MAX) { + ret = -ENOMEM; + goto put_dev; + } + if (addr->service_name_len == 0) + llcp_sock->dsap = addr->dsap; + else + llcp_sock->dsap = LLCP_SAP_SDP; + llcp_sock->nfc_protocol = addr->nfc_protocol; + llcp_sock->service_name_len = min_t(unsigned int, + addr->service_name_len, NFC_LLCP_MAX_SERVICE_NAME); + llcp_sock->service_name = kmemdup(addr->service_name, + llcp_sock->service_name_len, GFP_KERNEL); + + local->sockets[llcp_sock->ssap] = llcp_sock; + + ret = nfc_llcp_send_connect(llcp_sock); + if (ret) + goto put_dev; + + sk->sk_state = LLCP_CONNECTED; + + release_sock(sk); + return 0; + +put_dev: + nfc_put_device(dev); + +error: + release_sock(sk); + return ret; +} + +static int llcp_sock_recvmsg(struct kiocb *iocb, struct socket *sock, + struct msghdr *msg, size_t len, int flags) +{ + int noblock = flags & MSG_DONTWAIT; + struct sock *sk = sock->sk; + unsigned int copied, rlen; + struct sk_buff *skb, *cskb; + int err = 0; + + pr_debug("%p %zu\n", sk, len); + + lock_sock(sk); + + if (sk->sk_state == LLCP_CLOSED && + skb_queue_empty(&sk->sk_receive_queue)) { + release_sock(sk); + return 0; + } + + release_sock(sk); + + if (flags & (MSG_OOB)) + return -EOPNOTSUPP; + + skb = skb_recv_datagram(sk, flags, noblock, &err); + if (!skb) { + pr_err("Recv datagram failed state %d %d %d", + sk->sk_state, err, sock_error(sk)); + + if (sk->sk_shutdown & RCV_SHUTDOWN) + return 0; + + return err; + } + + rlen = skb->len; /* real length of skb */ + copied = min_t(unsigned int, rlen, len); + + cskb = skb; + if (memcpy_toiovec(msg->msg_iov, cskb->data, copied)) { + if (!(flags & MSG_PEEK)) + skb_queue_head(&sk->sk_receive_queue, skb); + return -EFAULT; + } + + /* Mark read part of skb as used */ + if (!(flags & MSG_PEEK)) { + + /* SOCK_STREAM: re-queue skb if it contains unreceived data */ + if (sk->sk_type == SOCK_STREAM) { + skb_pull(skb, copied); + if (skb->len) { + skb_queue_head(&sk->sk_receive_queue, skb); + goto done; + } + } + + kfree_skb(skb); + } + + /* XXX Queue backlogged skbs */ + +done: + /* SOCK_SEQPACKET: return real length if MSG_TRUNC is set */ + if (sk->sk_type == SOCK_SEQPACKET && (flags & MSG_TRUNC)) + copied = rlen; + + return copied; +} + +static const struct proto_ops llcp_sock_ops = { + .family = PF_NFC, + .owner = THIS_MODULE, + .bind = llcp_sock_bind, + .connect = llcp_sock_connect, + .release = llcp_sock_release, + .socketpair = sock_no_socketpair, + .accept = llcp_sock_accept, + .getname = llcp_sock_getname, + .poll = llcp_sock_poll, + .ioctl = sock_no_ioctl, + .listen = llcp_sock_listen, + .shutdown = sock_no_shutdown, + .setsockopt = sock_no_setsockopt, + .getsockopt = sock_no_getsockopt, + .sendmsg = sock_no_sendmsg, + .recvmsg = llcp_sock_recvmsg, + .mmap = sock_no_mmap, +}; + +static void llcp_sock_destruct(struct sock *sk) +{ + struct nfc_llcp_sock *llcp_sock = nfc_llcp_sock(sk); + + pr_debug("%p\n", sk); + + if (sk->sk_state == LLCP_CONNECTED) + nfc_put_device(llcp_sock->dev); + + skb_queue_purge(&sk->sk_receive_queue); + + nfc_llcp_sock_free(llcp_sock); + + if (!sock_flag(sk, SOCK_DEAD)) { + pr_err("Freeing alive NFC LLCP socket %p\n", sk); + return; + } +} + +struct sock *nfc_llcp_sock_alloc(struct socket *sock, int type, gfp_t gfp) +{ + struct sock *sk; + struct nfc_llcp_sock *llcp_sock; + + sk = sk_alloc(&init_net, PF_NFC, gfp, &llcp_sock_proto); + if (!sk) + return NULL; + + llcp_sock = nfc_llcp_sock(sk); + + sock_init_data(sock, sk); + sk->sk_state = LLCP_CLOSED; + sk->sk_protocol = NFC_SOCKPROTO_LLCP; + sk->sk_type = type; + sk->sk_destruct = llcp_sock_destruct; + + llcp_sock->ssap = 0; + llcp_sock->dsap = LLCP_SAP_SDP; + llcp_sock->send_n = llcp_sock->send_ack_n = 0; + llcp_sock->recv_n = llcp_sock->recv_ack_n = 0; + llcp_sock->remote_ready = 1; + skb_queue_head_init(&llcp_sock->tx_queue); + skb_queue_head_init(&llcp_sock->tx_pending_queue); + skb_queue_head_init(&llcp_sock->tx_backlog_queue); + INIT_LIST_HEAD(&llcp_sock->list); + INIT_LIST_HEAD(&llcp_sock->accept_queue); + + if (sock != NULL) + sock->state = SS_UNCONNECTED; + + return sk; +} + +void nfc_llcp_sock_free(struct nfc_llcp_sock *sock) +{ + kfree(sock->service_name); + + skb_queue_purge(&sock->tx_queue); + skb_queue_purge(&sock->tx_pending_queue); + skb_queue_purge(&sock->tx_backlog_queue); + + list_del_init(&sock->accept_queue); + + sock->parent = NULL; +} + +static int llcp_sock_create(struct net *net, struct socket *sock, + const struct nfc_protocol *nfc_proto) +{ + struct sock *sk; + + pr_debug("%p\n", sock); + + if (sock->type != SOCK_STREAM && sock->type != SOCK_DGRAM) + return -ESOCKTNOSUPPORT; + + sock->ops = &llcp_sock_ops; + + sk = nfc_llcp_sock_alloc(sock, sock->type, GFP_ATOMIC); + if (sk == NULL) + return -ENOMEM; + + return 0; +} + +static const struct nfc_protocol llcp_nfc_proto = { + .id = NFC_SOCKPROTO_LLCP, + .proto = &llcp_sock_proto, + .owner = THIS_MODULE, + .create = llcp_sock_create +}; + +int __init nfc_llcp_sock_init(void) +{ + return nfc_proto_register(&llcp_nfc_proto); +} + +void nfc_llcp_sock_exit(void) +{ + nfc_proto_unregister(&llcp_nfc_proto); +} diff --git a/net/nfc/nci/Kconfig b/net/nfc/nci/Kconfig new file mode 100644 index 000000000000..decdc49b26d8 --- /dev/null +++ b/net/nfc/nci/Kconfig @@ -0,0 +1,10 @@ +config NFC_NCI + depends on NFC && EXPERIMENTAL + tristate "NCI protocol support (EXPERIMENTAL)" + default n + help + NCI (NFC Controller Interface) is a communication protocol between + an NFC Controller (NFCC) and a Device Host (DH). + + Say Y here to compile NCI support into the kernel or say M to + compile it as module (nci). diff --git a/net/nfc/nci/Makefile b/net/nfc/nci/Makefile new file mode 100644 index 000000000000..cdb3a2e44471 --- /dev/null +++ b/net/nfc/nci/Makefile @@ -0,0 +1,7 @@ +# +# Makefile for the Linux NFC NCI layer. +# + +obj-$(CONFIG_NFC_NCI) += nci.o + +nci-objs := core.o data.o lib.o ntf.o rsp.o
\ No newline at end of file diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c new file mode 100644 index 000000000000..7650139a1a05 --- /dev/null +++ b/net/nfc/nci/core.c @@ -0,0 +1,786 @@ +/* + * The NFC Controller Interface is the communication protocol between an + * NFC Controller (NFCC) and a Device Host (DH). + * + * Copyright (C) 2011 Texas Instruments, Inc. + * + * Written by Ilan Elias <ilane@ti.com> + * + * Acknowledgements: + * This file is based on hci_core.c, which was written + * by Maxim Krasnyansky. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": %s: " fmt, __func__ + +#include <linux/types.h> +#include <linux/workqueue.h> +#include <linux/completion.h> +#include <linux/export.h> +#include <linux/sched.h> +#include <linux/bitops.h> +#include <linux/skbuff.h> + +#include "../nfc.h" +#include <net/nfc/nci.h> +#include <net/nfc/nci_core.h> +#include <linux/nfc.h> + +static void nci_cmd_work(struct work_struct *work); +static void nci_rx_work(struct work_struct *work); +static void nci_tx_work(struct work_struct *work); + +/* ---- NCI requests ---- */ + +void nci_req_complete(struct nci_dev *ndev, int result) +{ + if (ndev->req_status == NCI_REQ_PEND) { + ndev->req_result = result; + ndev->req_status = NCI_REQ_DONE; + complete(&ndev->req_completion); + } +} + +static void nci_req_cancel(struct nci_dev *ndev, int err) +{ + if (ndev->req_status == NCI_REQ_PEND) { + ndev->req_result = err; + ndev->req_status = NCI_REQ_CANCELED; + complete(&ndev->req_completion); + } +} + +/* Execute request and wait for completion. */ +static int __nci_request(struct nci_dev *ndev, + void (*req)(struct nci_dev *ndev, unsigned long opt), + unsigned long opt, + __u32 timeout) +{ + int rc = 0; + long completion_rc; + + ndev->req_status = NCI_REQ_PEND; + + init_completion(&ndev->req_completion); + req(ndev, opt); + completion_rc = wait_for_completion_interruptible_timeout( + &ndev->req_completion, + timeout); + + pr_debug("wait_for_completion return %ld\n", completion_rc); + + if (completion_rc > 0) { + switch (ndev->req_status) { + case NCI_REQ_DONE: + rc = nci_to_errno(ndev->req_result); + break; + + case NCI_REQ_CANCELED: + rc = -ndev->req_result; + break; + + default: + rc = -ETIMEDOUT; + break; + } + } else { + pr_err("wait_for_completion_interruptible_timeout failed %ld\n", + completion_rc); + + rc = ((completion_rc == 0) ? (-ETIMEDOUT) : (completion_rc)); + } + + ndev->req_status = ndev->req_result = 0; + + return rc; +} + +static inline int nci_request(struct nci_dev *ndev, + void (*req)(struct nci_dev *ndev, unsigned long opt), + unsigned long opt, __u32 timeout) +{ + int rc; + + if (!test_bit(NCI_UP, &ndev->flags)) + return -ENETDOWN; + + /* Serialize all requests */ + mutex_lock(&ndev->req_lock); + rc = __nci_request(ndev, req, opt, timeout); + mutex_unlock(&ndev->req_lock); + + return rc; +} + +static void nci_reset_req(struct nci_dev *ndev, unsigned long opt) +{ + struct nci_core_reset_cmd cmd; + + cmd.reset_type = NCI_RESET_TYPE_RESET_CONFIG; + nci_send_cmd(ndev, NCI_OP_CORE_RESET_CMD, 1, &cmd); +} + +static void nci_init_req(struct nci_dev *ndev, unsigned long opt) +{ + nci_send_cmd(ndev, NCI_OP_CORE_INIT_CMD, 0, NULL); +} + +static void nci_init_complete_req(struct nci_dev *ndev, unsigned long opt) +{ + struct nci_rf_disc_map_cmd cmd; + struct disc_map_config *cfg = cmd.mapping_configs; + __u8 *num = &cmd.num_mapping_configs; + int i; + + /* set rf mapping configurations */ + *num = 0; + + /* by default mapping is set to NCI_RF_INTERFACE_FRAME */ + for (i = 0; i < ndev->num_supported_rf_interfaces; i++) { + if (ndev->supported_rf_interfaces[i] == + NCI_RF_INTERFACE_ISO_DEP) { + cfg[*num].rf_protocol = NCI_RF_PROTOCOL_ISO_DEP; + cfg[*num].mode = NCI_DISC_MAP_MODE_POLL | + NCI_DISC_MAP_MODE_LISTEN; + cfg[*num].rf_interface = NCI_RF_INTERFACE_ISO_DEP; + (*num)++; + } else if (ndev->supported_rf_interfaces[i] == + NCI_RF_INTERFACE_NFC_DEP) { + cfg[*num].rf_protocol = NCI_RF_PROTOCOL_NFC_DEP; + cfg[*num].mode = NCI_DISC_MAP_MODE_POLL | + NCI_DISC_MAP_MODE_LISTEN; + cfg[*num].rf_interface = NCI_RF_INTERFACE_NFC_DEP; + (*num)++; + } + + if (*num == NCI_MAX_NUM_MAPPING_CONFIGS) + break; + } + + nci_send_cmd(ndev, NCI_OP_RF_DISCOVER_MAP_CMD, + (1 + ((*num)*sizeof(struct disc_map_config))), + &cmd); +} + +static void nci_rf_discover_req(struct nci_dev *ndev, unsigned long opt) +{ + struct nci_rf_disc_cmd cmd; + __u32 protocols = opt; + + cmd.num_disc_configs = 0; + + if ((cmd.num_disc_configs < NCI_MAX_NUM_RF_CONFIGS) && + (protocols & NFC_PROTO_JEWEL_MASK + || protocols & NFC_PROTO_MIFARE_MASK + || protocols & NFC_PROTO_ISO14443_MASK + || protocols & NFC_PROTO_NFC_DEP_MASK)) { + cmd.disc_configs[cmd.num_disc_configs].rf_tech_and_mode = + NCI_NFC_A_PASSIVE_POLL_MODE; + cmd.disc_configs[cmd.num_disc_configs].frequency = 1; + cmd.num_disc_configs++; + } + + if ((cmd.num_disc_configs < NCI_MAX_NUM_RF_CONFIGS) && + (protocols & NFC_PROTO_ISO14443_MASK)) { + cmd.disc_configs[cmd.num_disc_configs].rf_tech_and_mode = + NCI_NFC_B_PASSIVE_POLL_MODE; + cmd.disc_configs[cmd.num_disc_configs].frequency = 1; + cmd.num_disc_configs++; + } + + if ((cmd.num_disc_configs < NCI_MAX_NUM_RF_CONFIGS) && + (protocols & NFC_PROTO_FELICA_MASK + || protocols & NFC_PROTO_NFC_DEP_MASK)) { + cmd.disc_configs[cmd.num_disc_configs].rf_tech_and_mode = + NCI_NFC_F_PASSIVE_POLL_MODE; + cmd.disc_configs[cmd.num_disc_configs].frequency = 1; + cmd.num_disc_configs++; + } + + nci_send_cmd(ndev, NCI_OP_RF_DISCOVER_CMD, + (1 + (cmd.num_disc_configs*sizeof(struct disc_config))), + &cmd); +} + +static void nci_rf_deactivate_req(struct nci_dev *ndev, unsigned long opt) +{ + struct nci_rf_deactivate_cmd cmd; + + cmd.type = NCI_DEACTIVATE_TYPE_IDLE_MODE; + + nci_send_cmd(ndev, NCI_OP_RF_DEACTIVATE_CMD, + sizeof(struct nci_rf_deactivate_cmd), + &cmd); +} + +static int nci_open_device(struct nci_dev *ndev) +{ + int rc = 0; + + mutex_lock(&ndev->req_lock); + + if (test_bit(NCI_UP, &ndev->flags)) { + rc = -EALREADY; + goto done; + } + + if (ndev->ops->open(ndev)) { + rc = -EIO; + goto done; + } + + atomic_set(&ndev->cmd_cnt, 1); + + set_bit(NCI_INIT, &ndev->flags); + + rc = __nci_request(ndev, nci_reset_req, 0, + msecs_to_jiffies(NCI_RESET_TIMEOUT)); + + if (!rc) { + rc = __nci_request(ndev, nci_init_req, 0, + msecs_to_jiffies(NCI_INIT_TIMEOUT)); + } + + if (!rc) { + rc = __nci_request(ndev, nci_init_complete_req, 0, + msecs_to_jiffies(NCI_INIT_TIMEOUT)); + } + + clear_bit(NCI_INIT, &ndev->flags); + + if (!rc) { + set_bit(NCI_UP, &ndev->flags); + } else { + /* Init failed, cleanup */ + skb_queue_purge(&ndev->cmd_q); + skb_queue_purge(&ndev->rx_q); + skb_queue_purge(&ndev->tx_q); + + ndev->ops->close(ndev); + ndev->flags = 0; + } + +done: + mutex_unlock(&ndev->req_lock); + return rc; +} + +static int nci_close_device(struct nci_dev *ndev) +{ + nci_req_cancel(ndev, ENODEV); + mutex_lock(&ndev->req_lock); + + if (!test_and_clear_bit(NCI_UP, &ndev->flags)) { + del_timer_sync(&ndev->cmd_timer); + mutex_unlock(&ndev->req_lock); + return 0; + } + + /* Drop RX and TX queues */ + skb_queue_purge(&ndev->rx_q); + skb_queue_purge(&ndev->tx_q); + + /* Flush RX and TX wq */ + flush_workqueue(ndev->rx_wq); + flush_workqueue(ndev->tx_wq); + + /* Reset device */ + skb_queue_purge(&ndev->cmd_q); + atomic_set(&ndev->cmd_cnt, 1); + + set_bit(NCI_INIT, &ndev->flags); + __nci_request(ndev, nci_reset_req, 0, + msecs_to_jiffies(NCI_RESET_TIMEOUT)); + clear_bit(NCI_INIT, &ndev->flags); + + /* Flush cmd wq */ + flush_workqueue(ndev->cmd_wq); + + /* After this point our queues are empty + * and no works are scheduled. */ + ndev->ops->close(ndev); + + /* Clear flags */ + ndev->flags = 0; + + mutex_unlock(&ndev->req_lock); + + return 0; +} + +/* NCI command timer function */ +static void nci_cmd_timer(unsigned long arg) +{ + struct nci_dev *ndev = (void *) arg; + + atomic_set(&ndev->cmd_cnt, 1); + queue_work(ndev->cmd_wq, &ndev->cmd_work); +} + +static int nci_dev_up(struct nfc_dev *nfc_dev) +{ + struct nci_dev *ndev = nfc_get_drvdata(nfc_dev); + + return nci_open_device(ndev); +} + +static int nci_dev_down(struct nfc_dev *nfc_dev) +{ + struct nci_dev *ndev = nfc_get_drvdata(nfc_dev); + + return nci_close_device(ndev); +} + +static int nci_start_poll(struct nfc_dev *nfc_dev, __u32 protocols) +{ + struct nci_dev *ndev = nfc_get_drvdata(nfc_dev); + int rc; + + if (test_bit(NCI_DISCOVERY, &ndev->flags)) { + pr_err("unable to start poll, since poll is already active\n"); + return -EBUSY; + } + + if (ndev->target_active_prot) { + pr_err("there is an active target\n"); + return -EBUSY; + } + + if (test_bit(NCI_POLL_ACTIVE, &ndev->flags)) { + pr_debug("target is active, implicitly deactivate...\n"); + + rc = nci_request(ndev, nci_rf_deactivate_req, 0, + msecs_to_jiffies(NCI_RF_DEACTIVATE_TIMEOUT)); + if (rc) + return -EBUSY; + } + + rc = nci_request(ndev, nci_rf_discover_req, protocols, + msecs_to_jiffies(NCI_RF_DISC_TIMEOUT)); + + if (!rc) + ndev->poll_prots = protocols; + + return rc; +} + +static void nci_stop_poll(struct nfc_dev *nfc_dev) +{ + struct nci_dev *ndev = nfc_get_drvdata(nfc_dev); + + if (!test_bit(NCI_DISCOVERY, &ndev->flags)) { + pr_err("unable to stop poll, since poll is not active\n"); + return; + } + + nci_request(ndev, nci_rf_deactivate_req, 0, + msecs_to_jiffies(NCI_RF_DEACTIVATE_TIMEOUT)); +} + +static int nci_activate_target(struct nfc_dev *nfc_dev, __u32 target_idx, + __u32 protocol) +{ + struct nci_dev *ndev = nfc_get_drvdata(nfc_dev); + + pr_debug("target_idx %d, protocol 0x%x\n", target_idx, protocol); + + if (!test_bit(NCI_POLL_ACTIVE, &ndev->flags)) { + pr_err("there is no available target to activate\n"); + return -EINVAL; + } + + if (ndev->target_active_prot) { + pr_err("there is already an active target\n"); + return -EBUSY; + } + + if (!(ndev->target_available_prots & (1 << protocol))) { + pr_err("target does not support the requested protocol 0x%x\n", + protocol); + return -EINVAL; + } + + ndev->target_active_prot = protocol; + ndev->target_available_prots = 0; + + return 0; +} + +static void nci_deactivate_target(struct nfc_dev *nfc_dev, __u32 target_idx) +{ + struct nci_dev *ndev = nfc_get_drvdata(nfc_dev); + + pr_debug("target_idx %d\n", target_idx); + + if (!ndev->target_active_prot) { + pr_err("unable to deactivate target, no active target\n"); + return; + } + + ndev->target_active_prot = 0; + + if (test_bit(NCI_POLL_ACTIVE, &ndev->flags)) { + nci_request(ndev, nci_rf_deactivate_req, 0, + msecs_to_jiffies(NCI_RF_DEACTIVATE_TIMEOUT)); + } +} + +static int nci_data_exchange(struct nfc_dev *nfc_dev, __u32 target_idx, + struct sk_buff *skb, + data_exchange_cb_t cb, + void *cb_context) +{ + struct nci_dev *ndev = nfc_get_drvdata(nfc_dev); + int rc; + + pr_debug("target_idx %d, len %d\n", target_idx, skb->len); + + if (!ndev->target_active_prot) { + pr_err("unable to exchange data, no active target\n"); + return -EINVAL; + } + + if (test_and_set_bit(NCI_DATA_EXCHANGE, &ndev->flags)) + return -EBUSY; + + /* store cb and context to be used on receiving data */ + ndev->data_exchange_cb = cb; + ndev->data_exchange_cb_context = cb_context; + + rc = nci_send_data(ndev, NCI_STATIC_RF_CONN_ID, skb); + if (rc) + clear_bit(NCI_DATA_EXCHANGE, &ndev->flags); + + return rc; +} + +static struct nfc_ops nci_nfc_ops = { + .dev_up = nci_dev_up, + .dev_down = nci_dev_down, + .start_poll = nci_start_poll, + .stop_poll = nci_stop_poll, + .activate_target = nci_activate_target, + .deactivate_target = nci_deactivate_target, + .data_exchange = nci_data_exchange, +}; + +/* ---- Interface to NCI drivers ---- */ + +/** + * nci_allocate_device - allocate a new nci device + * + * @ops: device operations + * @supported_protocols: NFC protocols supported by the device + */ +struct nci_dev *nci_allocate_device(struct nci_ops *ops, + __u32 supported_protocols, + int tx_headroom, + int tx_tailroom) +{ + struct nci_dev *ndev; + + pr_debug("supported_protocols 0x%x\n", supported_protocols); + + if (!ops->open || !ops->close || !ops->send) + return NULL; + + if (!supported_protocols) + return NULL; + + ndev = kzalloc(sizeof(struct nci_dev), GFP_KERNEL); + if (!ndev) + return NULL; + + ndev->ops = ops; + ndev->tx_headroom = tx_headroom; + ndev->tx_tailroom = tx_tailroom; + + ndev->nfc_dev = nfc_allocate_device(&nci_nfc_ops, + supported_protocols, + tx_headroom + NCI_DATA_HDR_SIZE, + tx_tailroom); + if (!ndev->nfc_dev) + goto free_exit; + + nfc_set_drvdata(ndev->nfc_dev, ndev); + + return ndev; + +free_exit: + kfree(ndev); + return NULL; +} +EXPORT_SYMBOL(nci_allocate_device); + +/** + * nci_free_device - deallocate nci device + * + * @ndev: The nci device to deallocate + */ +void nci_free_device(struct nci_dev *ndev) +{ + nfc_free_device(ndev->nfc_dev); + kfree(ndev); +} +EXPORT_SYMBOL(nci_free_device); + +/** + * nci_register_device - register a nci device in the nfc subsystem + * + * @dev: The nci device to register + */ +int nci_register_device(struct nci_dev *ndev) +{ + int rc; + struct device *dev = &ndev->nfc_dev->dev; + char name[32]; + + rc = nfc_register_device(ndev->nfc_dev); + if (rc) + goto exit; + + ndev->flags = 0; + + INIT_WORK(&ndev->cmd_work, nci_cmd_work); + snprintf(name, sizeof(name), "%s_nci_cmd_wq", dev_name(dev)); + ndev->cmd_wq = create_singlethread_workqueue(name); + if (!ndev->cmd_wq) { + rc = -ENOMEM; + goto unreg_exit; + } + + INIT_WORK(&ndev->rx_work, nci_rx_work); + snprintf(name, sizeof(name), "%s_nci_rx_wq", dev_name(dev)); + ndev->rx_wq = create_singlethread_workqueue(name); + if (!ndev->rx_wq) { + rc = -ENOMEM; + goto destroy_cmd_wq_exit; + } + + INIT_WORK(&ndev->tx_work, nci_tx_work); + snprintf(name, sizeof(name), "%s_nci_tx_wq", dev_name(dev)); + ndev->tx_wq = create_singlethread_workqueue(name); + if (!ndev->tx_wq) { + rc = -ENOMEM; + goto destroy_rx_wq_exit; + } + + skb_queue_head_init(&ndev->cmd_q); + skb_queue_head_init(&ndev->rx_q); + skb_queue_head_init(&ndev->tx_q); + + setup_timer(&ndev->cmd_timer, nci_cmd_timer, + (unsigned long) ndev); + + mutex_init(&ndev->req_lock); + + goto exit; + +destroy_rx_wq_exit: + destroy_workqueue(ndev->rx_wq); + +destroy_cmd_wq_exit: + destroy_workqueue(ndev->cmd_wq); + +unreg_exit: + nfc_unregister_device(ndev->nfc_dev); + +exit: + return rc; +} +EXPORT_SYMBOL(nci_register_device); + +/** + * nci_unregister_device - unregister a nci device in the nfc subsystem + * + * @dev: The nci device to unregister + */ +void nci_unregister_device(struct nci_dev *ndev) +{ + nci_close_device(ndev); + + destroy_workqueue(ndev->cmd_wq); + destroy_workqueue(ndev->rx_wq); + destroy_workqueue(ndev->tx_wq); + + nfc_unregister_device(ndev->nfc_dev); +} +EXPORT_SYMBOL(nci_unregister_device); + +/** + * nci_recv_frame - receive frame from NCI drivers + * + * @skb: The sk_buff to receive + */ +int nci_recv_frame(struct sk_buff *skb) +{ + struct nci_dev *ndev = (struct nci_dev *) skb->dev; + + pr_debug("len %d\n", skb->len); + + if (!ndev || (!test_bit(NCI_UP, &ndev->flags) + && !test_bit(NCI_INIT, &ndev->flags))) { + kfree_skb(skb); + return -ENXIO; + } + + /* Queue frame for rx worker thread */ + skb_queue_tail(&ndev->rx_q, skb); + queue_work(ndev->rx_wq, &ndev->rx_work); + + return 0; +} +EXPORT_SYMBOL(nci_recv_frame); + +static int nci_send_frame(struct sk_buff *skb) +{ + struct nci_dev *ndev = (struct nci_dev *) skb->dev; + + pr_debug("len %d\n", skb->len); + + if (!ndev) { + kfree_skb(skb); + return -ENODEV; + } + + /* Get rid of skb owner, prior to sending to the driver. */ + skb_orphan(skb); + + return ndev->ops->send(skb); +} + +/* Send NCI command */ +int nci_send_cmd(struct nci_dev *ndev, __u16 opcode, __u8 plen, void *payload) +{ + struct nci_ctrl_hdr *hdr; + struct sk_buff *skb; + + pr_debug("opcode 0x%x, plen %d\n", opcode, plen); + + skb = nci_skb_alloc(ndev, (NCI_CTRL_HDR_SIZE + plen), GFP_KERNEL); + if (!skb) { + pr_err("no memory for command\n"); + return -ENOMEM; + } + + hdr = (struct nci_ctrl_hdr *) skb_put(skb, NCI_CTRL_HDR_SIZE); + hdr->gid = nci_opcode_gid(opcode); + hdr->oid = nci_opcode_oid(opcode); + hdr->plen = plen; + + nci_mt_set((__u8 *)hdr, NCI_MT_CMD_PKT); + nci_pbf_set((__u8 *)hdr, NCI_PBF_LAST); + + if (plen) + memcpy(skb_put(skb, plen), payload, plen); + + skb->dev = (void *) ndev; + + skb_queue_tail(&ndev->cmd_q, skb); + queue_work(ndev->cmd_wq, &ndev->cmd_work); + + return 0; +} + +/* ---- NCI TX Data worker thread ---- */ + +static void nci_tx_work(struct work_struct *work) +{ + struct nci_dev *ndev = container_of(work, struct nci_dev, tx_work); + struct sk_buff *skb; + + pr_debug("credits_cnt %d\n", atomic_read(&ndev->credits_cnt)); + + /* Send queued tx data */ + while (atomic_read(&ndev->credits_cnt)) { + skb = skb_dequeue(&ndev->tx_q); + if (!skb) + return; + + /* Check if data flow control is used */ + if (atomic_read(&ndev->credits_cnt) != + NCI_DATA_FLOW_CONTROL_NOT_USED) + atomic_dec(&ndev->credits_cnt); + + pr_debug("NCI TX: MT=data, PBF=%d, conn_id=%d, plen=%d\n", + nci_pbf(skb->data), + nci_conn_id(skb->data), + nci_plen(skb->data)); + + nci_send_frame(skb); + } +} + +/* ----- NCI RX worker thread (data & control) ----- */ + +static void nci_rx_work(struct work_struct *work) +{ + struct nci_dev *ndev = container_of(work, struct nci_dev, rx_work); + struct sk_buff *skb; + + while ((skb = skb_dequeue(&ndev->rx_q))) { + /* Process frame */ + switch (nci_mt(skb->data)) { + case NCI_MT_RSP_PKT: + nci_rsp_packet(ndev, skb); + break; + + case NCI_MT_NTF_PKT: + nci_ntf_packet(ndev, skb); + break; + + case NCI_MT_DATA_PKT: + nci_rx_data_packet(ndev, skb); + break; + + default: + pr_err("unknown MT 0x%x\n", nci_mt(skb->data)); + kfree_skb(skb); + break; + } + } +} + +/* ----- NCI TX CMD worker thread ----- */ + +static void nci_cmd_work(struct work_struct *work) +{ + struct nci_dev *ndev = container_of(work, struct nci_dev, cmd_work); + struct sk_buff *skb; + + pr_debug("cmd_cnt %d\n", atomic_read(&ndev->cmd_cnt)); + + /* Send queued command */ + if (atomic_read(&ndev->cmd_cnt)) { + skb = skb_dequeue(&ndev->cmd_q); + if (!skb) + return; + + atomic_dec(&ndev->cmd_cnt); + + pr_debug("NCI TX: MT=cmd, PBF=%d, GID=0x%x, OID=0x%x, plen=%d\n", + nci_pbf(skb->data), + nci_opcode_gid(nci_opcode(skb->data)), + nci_opcode_oid(nci_opcode(skb->data)), + nci_plen(skb->data)); + + nci_send_frame(skb); + + mod_timer(&ndev->cmd_timer, + jiffies + msecs_to_jiffies(NCI_CMD_TIMEOUT)); + } +} diff --git a/net/nfc/nci/data.c b/net/nfc/nci/data.c new file mode 100644 index 000000000000..e5756b30e602 --- /dev/null +++ b/net/nfc/nci/data.c @@ -0,0 +1,250 @@ +/* + * The NFC Controller Interface is the communication protocol between an + * NFC Controller (NFCC) and a Device Host (DH). + * + * Copyright (C) 2011 Texas Instruments, Inc. + * + * Written by Ilan Elias <ilane@ti.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": %s: " fmt, __func__ + +#include <linux/types.h> +#include <linux/interrupt.h> +#include <linux/wait.h> +#include <linux/bitops.h> +#include <linux/skbuff.h> + +#include "../nfc.h" +#include <net/nfc/nci.h> +#include <net/nfc/nci_core.h> +#include <linux/nfc.h> + +/* Complete data exchange transaction and forward skb to nfc core */ +void nci_data_exchange_complete(struct nci_dev *ndev, + struct sk_buff *skb, + int err) +{ + data_exchange_cb_t cb = ndev->data_exchange_cb; + void *cb_context = ndev->data_exchange_cb_context; + + pr_debug("len %d, err %d\n", skb ? skb->len : 0, err); + + if (cb) { + ndev->data_exchange_cb = NULL; + ndev->data_exchange_cb_context = 0; + + /* forward skb to nfc core */ + cb(cb_context, skb, err); + } else if (skb) { + pr_err("no rx callback, dropping rx data...\n"); + + /* no waiting callback, free skb */ + kfree_skb(skb); + } + + clear_bit(NCI_DATA_EXCHANGE, &ndev->flags); +} + +/* ----------------- NCI TX Data ----------------- */ + +static inline void nci_push_data_hdr(struct nci_dev *ndev, + __u8 conn_id, + struct sk_buff *skb, + __u8 pbf) +{ + struct nci_data_hdr *hdr; + int plen = skb->len; + + hdr = (struct nci_data_hdr *) skb_push(skb, NCI_DATA_HDR_SIZE); + hdr->conn_id = conn_id; + hdr->rfu = 0; + hdr->plen = plen; + + nci_mt_set((__u8 *)hdr, NCI_MT_DATA_PKT); + nci_pbf_set((__u8 *)hdr, pbf); + + skb->dev = (void *) ndev; +} + +static int nci_queue_tx_data_frags(struct nci_dev *ndev, + __u8 conn_id, + struct sk_buff *skb) { + int total_len = skb->len; + unsigned char *data = skb->data; + unsigned long flags; + struct sk_buff_head frags_q; + struct sk_buff *skb_frag; + int frag_len; + int rc = 0; + + pr_debug("conn_id 0x%x, total_len %d\n", conn_id, total_len); + + __skb_queue_head_init(&frags_q); + + while (total_len) { + frag_len = + min_t(int, total_len, ndev->max_data_pkt_payload_size); + + skb_frag = nci_skb_alloc(ndev, + (NCI_DATA_HDR_SIZE + frag_len), + GFP_KERNEL); + if (skb_frag == NULL) { + rc = -ENOMEM; + goto free_exit; + } + skb_reserve(skb_frag, NCI_DATA_HDR_SIZE); + + /* first, copy the data */ + memcpy(skb_put(skb_frag, frag_len), data, frag_len); + + /* second, set the header */ + nci_push_data_hdr(ndev, conn_id, skb_frag, + ((total_len == frag_len) ? (NCI_PBF_LAST) : (NCI_PBF_CONT))); + + __skb_queue_tail(&frags_q, skb_frag); + + data += frag_len; + total_len -= frag_len; + + pr_debug("frag_len %d, remaining total_len %d\n", + frag_len, total_len); + } + + /* queue all fragments atomically */ + spin_lock_irqsave(&ndev->tx_q.lock, flags); + + while ((skb_frag = __skb_dequeue(&frags_q)) != NULL) + __skb_queue_tail(&ndev->tx_q, skb_frag); + + spin_unlock_irqrestore(&ndev->tx_q.lock, flags); + + /* free the original skb */ + kfree_skb(skb); + + goto exit; + +free_exit: + while ((skb_frag = __skb_dequeue(&frags_q)) != NULL) + kfree_skb(skb_frag); + +exit: + return rc; +} + +/* Send NCI data */ +int nci_send_data(struct nci_dev *ndev, __u8 conn_id, struct sk_buff *skb) +{ + int rc = 0; + + pr_debug("conn_id 0x%x, plen %d\n", conn_id, skb->len); + + /* check if the packet need to be fragmented */ + if (skb->len <= ndev->max_data_pkt_payload_size) { + /* no need to fragment packet */ + nci_push_data_hdr(ndev, conn_id, skb, NCI_PBF_LAST); + + skb_queue_tail(&ndev->tx_q, skb); + } else { + /* fragment packet and queue the fragments */ + rc = nci_queue_tx_data_frags(ndev, conn_id, skb); + if (rc) { + pr_err("failed to fragment tx data packet\n"); + goto free_exit; + } + } + + queue_work(ndev->tx_wq, &ndev->tx_work); + + goto exit; + +free_exit: + kfree_skb(skb); + +exit: + return rc; +} + +/* ----------------- NCI RX Data ----------------- */ + +static void nci_add_rx_data_frag(struct nci_dev *ndev, + struct sk_buff *skb, + __u8 pbf) +{ + int reassembly_len; + int err = 0; + + if (ndev->rx_data_reassembly) { + reassembly_len = ndev->rx_data_reassembly->len; + + /* first, make enough room for the already accumulated data */ + if (skb_cow_head(skb, reassembly_len)) { + pr_err("error adding room for accumulated rx data\n"); + + kfree_skb(skb); + skb = 0; + + kfree_skb(ndev->rx_data_reassembly); + ndev->rx_data_reassembly = 0; + + err = -ENOMEM; + goto exit; + } + + /* second, combine the two fragments */ + memcpy(skb_push(skb, reassembly_len), + ndev->rx_data_reassembly->data, + reassembly_len); + + /* third, free old reassembly */ + kfree_skb(ndev->rx_data_reassembly); + ndev->rx_data_reassembly = 0; + } + + if (pbf == NCI_PBF_CONT) { + /* need to wait for next fragment, store skb and exit */ + ndev->rx_data_reassembly = skb; + return; + } + +exit: + nci_data_exchange_complete(ndev, skb, err); +} + +/* Rx Data packet */ +void nci_rx_data_packet(struct nci_dev *ndev, struct sk_buff *skb) +{ + __u8 pbf = nci_pbf(skb->data); + + pr_debug("len %d\n", skb->len); + + pr_debug("NCI RX: MT=data, PBF=%d, conn_id=%d, plen=%d\n", + nci_pbf(skb->data), + nci_conn_id(skb->data), + nci_plen(skb->data)); + + /* strip the nci data header */ + skb_pull(skb, NCI_DATA_HDR_SIZE); + + if (ndev->target_active_prot == NFC_PROTO_MIFARE) { + /* frame I/F => remove the status byte */ + pr_debug("NFC_PROTO_MIFARE => remove the status byte\n"); + skb_trim(skb, (skb->len - 1)); + } + + nci_add_rx_data_frag(ndev, skb, pbf); +} diff --git a/net/nfc/nci/lib.c b/net/nfc/nci/lib.c new file mode 100644 index 000000000000..6a63e5eb483d --- /dev/null +++ b/net/nfc/nci/lib.c @@ -0,0 +1,85 @@ +/* + * The NFC Controller Interface is the communication protocol between an + * NFC Controller (NFCC) and a Device Host (DH). + * + * Copyright (C) 2011 Texas Instruments, Inc. + * + * Written by Ilan Elias <ilane@ti.com> + * + * Acknowledgements: + * This file is based on lib.c, which was written + * by Maxim Krasnyansky. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/errno.h> + +#include <net/nfc/nci.h> + +/* NCI status codes to Unix errno mapping */ +int nci_to_errno(__u8 code) +{ + switch (code) { + case NCI_STATUS_OK: + return 0; + + case NCI_STATUS_REJECTED: + return -EBUSY; + + case NCI_STATUS_RF_FRAME_CORRUPTED: + return -EBADMSG; + + case NCI_STATUS_NOT_INITIALIZED: + return -EHOSTDOWN; + + case NCI_STATUS_SYNTAX_ERROR: + case NCI_STATUS_SEMANTIC_ERROR: + case NCI_STATUS_INVALID_PARAM: + case NCI_STATUS_RF_PROTOCOL_ERROR: + case NCI_STATUS_NFCEE_PROTOCOL_ERROR: + return -EPROTO; + + case NCI_STATUS_UNKNOWN_GID: + case NCI_STATUS_UNKNOWN_OID: + return -EBADRQC; + + case NCI_STATUS_MESSAGE_SIZE_EXCEEDED: + return -EMSGSIZE; + + case NCI_STATUS_DISCOVERY_ALREADY_STARTED: + return -EALREADY; + + case NCI_STATUS_DISCOVERY_TARGET_ACTIVATION_FAILED: + case NCI_STATUS_NFCEE_INTERFACE_ACTIVATION_FAILED: + return -ECONNREFUSED; + + case NCI_STATUS_RF_TRANSMISSION_ERROR: + case NCI_STATUS_NFCEE_TRANSMISSION_ERROR: + return -ECOMM; + + case NCI_STATUS_RF_TIMEOUT_ERROR: + case NCI_STATUS_NFCEE_TIMEOUT_ERROR: + return -ETIMEDOUT; + + case NCI_STATUS_FAILED: + default: + return -ENOSYS; + } +} +EXPORT_SYMBOL(nci_to_errno); diff --git a/net/nfc/nci/ntf.c b/net/nfc/nci/ntf.c new file mode 100644 index 000000000000..b16a8dc2afbe --- /dev/null +++ b/net/nfc/nci/ntf.c @@ -0,0 +1,321 @@ +/* + * The NFC Controller Interface is the communication protocol between an + * NFC Controller (NFCC) and a Device Host (DH). + * + * Copyright (C) 2011 Texas Instruments, Inc. + * + * Written by Ilan Elias <ilane@ti.com> + * + * Acknowledgements: + * This file is based on hci_event.c, which was written + * by Maxim Krasnyansky. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": %s: " fmt, __func__ + +#include <linux/types.h> +#include <linux/interrupt.h> +#include <linux/bitops.h> +#include <linux/skbuff.h> + +#include "../nfc.h" +#include <net/nfc/nci.h> +#include <net/nfc/nci_core.h> +#include <linux/nfc.h> + +/* Handle NCI Notification packets */ + +static void nci_core_conn_credits_ntf_packet(struct nci_dev *ndev, + struct sk_buff *skb) +{ + struct nci_core_conn_credit_ntf *ntf = (void *) skb->data; + int i; + + pr_debug("num_entries %d\n", ntf->num_entries); + + if (ntf->num_entries > NCI_MAX_NUM_CONN) + ntf->num_entries = NCI_MAX_NUM_CONN; + + /* update the credits */ + for (i = 0; i < ntf->num_entries; i++) { + ntf->conn_entries[i].conn_id = + nci_conn_id(&ntf->conn_entries[i].conn_id); + + pr_debug("entry[%d]: conn_id %d, credits %d\n", + i, ntf->conn_entries[i].conn_id, + ntf->conn_entries[i].credits); + + if (ntf->conn_entries[i].conn_id == NCI_STATIC_RF_CONN_ID) { + /* found static rf connection */ + atomic_add(ntf->conn_entries[i].credits, + &ndev->credits_cnt); + } + } + + /* trigger the next tx */ + if (!skb_queue_empty(&ndev->tx_q)) + queue_work(ndev->tx_wq, &ndev->tx_work); +} + +static void nci_core_conn_intf_error_ntf_packet(struct nci_dev *ndev, + struct sk_buff *skb) +{ + struct nci_core_intf_error_ntf *ntf = (void *) skb->data; + + ntf->conn_id = nci_conn_id(&ntf->conn_id); + + pr_debug("status 0x%x, conn_id %d\n", ntf->status, ntf->conn_id); + + /* complete the data exchange transaction, if exists */ + if (test_bit(NCI_DATA_EXCHANGE, &ndev->flags)) + nci_data_exchange_complete(ndev, NULL, -EIO); +} + +static __u8 *nci_extract_rf_params_nfca_passive_poll(struct nci_dev *ndev, + struct nci_rf_intf_activated_ntf *ntf, __u8 *data) +{ + struct rf_tech_specific_params_nfca_poll *nfca_poll; + + nfca_poll = &ntf->rf_tech_specific_params.nfca_poll; + + nfca_poll->sens_res = __le16_to_cpu(*((__u16 *)data)); + data += 2; + + nfca_poll->nfcid1_len = *data++; + + pr_debug("sens_res 0x%x, nfcid1_len %d\n", + nfca_poll->sens_res, nfca_poll->nfcid1_len); + + memcpy(nfca_poll->nfcid1, data, nfca_poll->nfcid1_len); + data += nfca_poll->nfcid1_len; + + nfca_poll->sel_res_len = *data++; + + if (nfca_poll->sel_res_len != 0) + nfca_poll->sel_res = *data++; + + pr_debug("sel_res_len %d, sel_res 0x%x\n", + nfca_poll->sel_res_len, + nfca_poll->sel_res); + + return data; +} + +static int nci_extract_activation_params_iso_dep(struct nci_dev *ndev, + struct nci_rf_intf_activated_ntf *ntf, __u8 *data) +{ + struct activation_params_nfca_poll_iso_dep *nfca_poll; + + switch (ntf->activation_rf_tech_and_mode) { + case NCI_NFC_A_PASSIVE_POLL_MODE: + nfca_poll = &ntf->activation_params.nfca_poll_iso_dep; + nfca_poll->rats_res_len = *data++; + if (nfca_poll->rats_res_len > 0) { + memcpy(nfca_poll->rats_res, + data, + nfca_poll->rats_res_len); + } + break; + + default: + pr_err("unsupported activation_rf_tech_and_mode 0x%x\n", + ntf->activation_rf_tech_and_mode); + return -EPROTO; + } + + return 0; +} + +static void nci_target_found(struct nci_dev *ndev, + struct nci_rf_intf_activated_ntf *ntf) +{ + struct nfc_target nfc_tgt; + + if (ntf->rf_protocol == NCI_RF_PROTOCOL_T2T) /* T2T MifareUL */ + nfc_tgt.supported_protocols = NFC_PROTO_MIFARE_MASK; + else if (ntf->rf_protocol == NCI_RF_PROTOCOL_ISO_DEP) /* 4A */ + nfc_tgt.supported_protocols = NFC_PROTO_ISO14443_MASK; + else + nfc_tgt.supported_protocols = 0; + + nfc_tgt.sens_res = ntf->rf_tech_specific_params.nfca_poll.sens_res; + nfc_tgt.sel_res = ntf->rf_tech_specific_params.nfca_poll.sel_res; + nfc_tgt.nfcid1_len = ntf->rf_tech_specific_params.nfca_poll.nfcid1_len; + if (nfc_tgt.nfcid1_len > 0) { + memcpy(nfc_tgt.nfcid1, + ntf->rf_tech_specific_params.nfca_poll.nfcid1, + nfc_tgt.nfcid1_len); + } + + if (!(nfc_tgt.supported_protocols & ndev->poll_prots)) { + pr_debug("the target found does not have the desired protocol\n"); + return; + } + + pr_debug("new target found, supported_protocols 0x%x\n", + nfc_tgt.supported_protocols); + + ndev->target_available_prots = nfc_tgt.supported_protocols; + ndev->max_data_pkt_payload_size = ntf->max_data_pkt_payload_size; + ndev->initial_num_credits = ntf->initial_num_credits; + + /* set the available credits to initial value */ + atomic_set(&ndev->credits_cnt, ndev->initial_num_credits); + + nfc_targets_found(ndev->nfc_dev, &nfc_tgt, 1); +} + +static void nci_rf_intf_activated_ntf_packet(struct nci_dev *ndev, + struct sk_buff *skb) +{ + struct nci_rf_intf_activated_ntf ntf; + __u8 *data = skb->data; + int err = 0; + + clear_bit(NCI_DISCOVERY, &ndev->flags); + set_bit(NCI_POLL_ACTIVE, &ndev->flags); + + ntf.rf_discovery_id = *data++; + ntf.rf_interface = *data++; + ntf.rf_protocol = *data++; + ntf.activation_rf_tech_and_mode = *data++; + ntf.max_data_pkt_payload_size = *data++; + ntf.initial_num_credits = *data++; + ntf.rf_tech_specific_params_len = *data++; + + pr_debug("rf_discovery_id %d\n", ntf.rf_discovery_id); + pr_debug("rf_interface 0x%x\n", ntf.rf_interface); + pr_debug("rf_protocol 0x%x\n", ntf.rf_protocol); + pr_debug("activation_rf_tech_and_mode 0x%x\n", + ntf.activation_rf_tech_and_mode); + pr_debug("max_data_pkt_payload_size 0x%x\n", + ntf.max_data_pkt_payload_size); + pr_debug("initial_num_credits 0x%x\n", ntf.initial_num_credits); + pr_debug("rf_tech_specific_params_len %d\n", + ntf.rf_tech_specific_params_len); + + if (ntf.rf_tech_specific_params_len > 0) { + switch (ntf.activation_rf_tech_and_mode) { + case NCI_NFC_A_PASSIVE_POLL_MODE: + data = nci_extract_rf_params_nfca_passive_poll(ndev, + &ntf, data); + break; + + default: + pr_err("unsupported activation_rf_tech_and_mode 0x%x\n", + ntf.activation_rf_tech_and_mode); + return; + } + } + + ntf.data_exch_rf_tech_and_mode = *data++; + ntf.data_exch_tx_bit_rate = *data++; + ntf.data_exch_rx_bit_rate = *data++; + ntf.activation_params_len = *data++; + + pr_debug("data_exch_rf_tech_and_mode 0x%x\n", + ntf.data_exch_rf_tech_and_mode); + pr_debug("data_exch_tx_bit_rate 0x%x\n", + ntf.data_exch_tx_bit_rate); + pr_debug("data_exch_rx_bit_rate 0x%x\n", + ntf.data_exch_rx_bit_rate); + pr_debug("activation_params_len %d\n", + ntf.activation_params_len); + + if (ntf.activation_params_len > 0) { + switch (ntf.rf_interface) { + case NCI_RF_INTERFACE_ISO_DEP: + err = nci_extract_activation_params_iso_dep(ndev, + &ntf, data); + break; + + case NCI_RF_INTERFACE_FRAME: + /* no activation params */ + break; + + default: + pr_err("unsupported rf_interface 0x%x\n", + ntf.rf_interface); + return; + } + } + + if (!err) + nci_target_found(ndev, &ntf); +} + +static void nci_rf_deactivate_ntf_packet(struct nci_dev *ndev, + struct sk_buff *skb) +{ + struct nci_rf_deactivate_ntf *ntf = (void *) skb->data; + + pr_debug("entry, type 0x%x, reason 0x%x\n", ntf->type, ntf->reason); + + clear_bit(NCI_POLL_ACTIVE, &ndev->flags); + ndev->target_active_prot = 0; + + /* drop tx data queue */ + skb_queue_purge(&ndev->tx_q); + + /* drop partial rx data packet */ + if (ndev->rx_data_reassembly) { + kfree_skb(ndev->rx_data_reassembly); + ndev->rx_data_reassembly = 0; + } + + /* complete the data exchange transaction, if exists */ + if (test_bit(NCI_DATA_EXCHANGE, &ndev->flags)) + nci_data_exchange_complete(ndev, NULL, -EIO); +} + +void nci_ntf_packet(struct nci_dev *ndev, struct sk_buff *skb) +{ + __u16 ntf_opcode = nci_opcode(skb->data); + + pr_debug("NCI RX: MT=ntf, PBF=%d, GID=0x%x, OID=0x%x, plen=%d\n", + nci_pbf(skb->data), + nci_opcode_gid(ntf_opcode), + nci_opcode_oid(ntf_opcode), + nci_plen(skb->data)); + + /* strip the nci control header */ + skb_pull(skb, NCI_CTRL_HDR_SIZE); + + switch (ntf_opcode) { + case NCI_OP_CORE_CONN_CREDITS_NTF: + nci_core_conn_credits_ntf_packet(ndev, skb); + break; + + case NCI_OP_CORE_INTF_ERROR_NTF: + nci_core_conn_intf_error_ntf_packet(ndev, skb); + break; + + case NCI_OP_RF_INTF_ACTIVATED_NTF: + nci_rf_intf_activated_ntf_packet(ndev, skb); + break; + + case NCI_OP_RF_DEACTIVATE_NTF: + nci_rf_deactivate_ntf_packet(ndev, skb); + break; + + default: + pr_err("unknown ntf opcode 0x%x\n", ntf_opcode); + break; + } + + kfree_skb(skb); +} diff --git a/net/nfc/nci/rsp.c b/net/nfc/nci/rsp.c new file mode 100644 index 000000000000..2840ae2f3615 --- /dev/null +++ b/net/nfc/nci/rsp.c @@ -0,0 +1,205 @@ +/* + * The NFC Controller Interface is the communication protocol between an + * NFC Controller (NFCC) and a Device Host (DH). + * + * Copyright (C) 2011 Texas Instruments, Inc. + * + * Written by Ilan Elias <ilane@ti.com> + * + * Acknowledgements: + * This file is based on hci_event.c, which was written + * by Maxim Krasnyansky. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": %s: " fmt, __func__ + +#include <linux/types.h> +#include <linux/interrupt.h> +#include <linux/bitops.h> +#include <linux/skbuff.h> + +#include "../nfc.h" +#include <net/nfc/nci.h> +#include <net/nfc/nci_core.h> + +/* Handle NCI Response packets */ + +static void nci_core_reset_rsp_packet(struct nci_dev *ndev, struct sk_buff *skb) +{ + struct nci_core_reset_rsp *rsp = (void *) skb->data; + + pr_debug("status 0x%x\n", rsp->status); + + if (rsp->status == NCI_STATUS_OK) { + ndev->nci_ver = rsp->nci_ver; + pr_debug("nci_ver 0x%x, config_status 0x%x\n", + rsp->nci_ver, rsp->config_status); + } + + nci_req_complete(ndev, rsp->status); +} + +static void nci_core_init_rsp_packet(struct nci_dev *ndev, struct sk_buff *skb) +{ + struct nci_core_init_rsp_1 *rsp_1 = (void *) skb->data; + struct nci_core_init_rsp_2 *rsp_2; + + pr_debug("status 0x%x\n", rsp_1->status); + + if (rsp_1->status != NCI_STATUS_OK) + goto exit; + + ndev->nfcc_features = __le32_to_cpu(rsp_1->nfcc_features); + ndev->num_supported_rf_interfaces = rsp_1->num_supported_rf_interfaces; + + if (ndev->num_supported_rf_interfaces > + NCI_MAX_SUPPORTED_RF_INTERFACES) { + ndev->num_supported_rf_interfaces = + NCI_MAX_SUPPORTED_RF_INTERFACES; + } + + memcpy(ndev->supported_rf_interfaces, + rsp_1->supported_rf_interfaces, + ndev->num_supported_rf_interfaces); + + rsp_2 = (void *) (skb->data + 6 + rsp_1->num_supported_rf_interfaces); + + ndev->max_logical_connections = + rsp_2->max_logical_connections; + ndev->max_routing_table_size = + __le16_to_cpu(rsp_2->max_routing_table_size); + ndev->max_ctrl_pkt_payload_len = + rsp_2->max_ctrl_pkt_payload_len; + ndev->max_size_for_large_params = + __le16_to_cpu(rsp_2->max_size_for_large_params); + ndev->manufact_id = + rsp_2->manufact_id; + ndev->manufact_specific_info = + __le32_to_cpu(rsp_2->manufact_specific_info); + + pr_debug("nfcc_features 0x%x\n", + ndev->nfcc_features); + pr_debug("num_supported_rf_interfaces %d\n", + ndev->num_supported_rf_interfaces); + pr_debug("supported_rf_interfaces[0] 0x%x\n", + ndev->supported_rf_interfaces[0]); + pr_debug("supported_rf_interfaces[1] 0x%x\n", + ndev->supported_rf_interfaces[1]); + pr_debug("supported_rf_interfaces[2] 0x%x\n", + ndev->supported_rf_interfaces[2]); + pr_debug("supported_rf_interfaces[3] 0x%x\n", + ndev->supported_rf_interfaces[3]); + pr_debug("max_logical_connections %d\n", + ndev->max_logical_connections); + pr_debug("max_routing_table_size %d\n", + ndev->max_routing_table_size); + pr_debug("max_ctrl_pkt_payload_len %d\n", + ndev->max_ctrl_pkt_payload_len); + pr_debug("max_size_for_large_params %d\n", + ndev->max_size_for_large_params); + pr_debug("manufact_id 0x%x\n", + ndev->manufact_id); + pr_debug("manufact_specific_info 0x%x\n", + ndev->manufact_specific_info); + +exit: + nci_req_complete(ndev, rsp_1->status); +} + +static void nci_rf_disc_map_rsp_packet(struct nci_dev *ndev, + struct sk_buff *skb) +{ + __u8 status = skb->data[0]; + + pr_debug("status 0x%x\n", status); + + nci_req_complete(ndev, status); +} + +static void nci_rf_disc_rsp_packet(struct nci_dev *ndev, struct sk_buff *skb) +{ + __u8 status = skb->data[0]; + + pr_debug("status 0x%x\n", status); + + if (status == NCI_STATUS_OK) + set_bit(NCI_DISCOVERY, &ndev->flags); + + nci_req_complete(ndev, status); +} + +static void nci_rf_deactivate_rsp_packet(struct nci_dev *ndev, + struct sk_buff *skb) +{ + __u8 status = skb->data[0]; + + pr_debug("status 0x%x\n", status); + + clear_bit(NCI_DISCOVERY, &ndev->flags); + + nci_req_complete(ndev, status); +} + +void nci_rsp_packet(struct nci_dev *ndev, struct sk_buff *skb) +{ + __u16 rsp_opcode = nci_opcode(skb->data); + + /* we got a rsp, stop the cmd timer */ + del_timer(&ndev->cmd_timer); + + pr_debug("NCI RX: MT=rsp, PBF=%d, GID=0x%x, OID=0x%x, plen=%d\n", + nci_pbf(skb->data), + nci_opcode_gid(rsp_opcode), + nci_opcode_oid(rsp_opcode), + nci_plen(skb->data)); + + /* strip the nci control header */ + skb_pull(skb, NCI_CTRL_HDR_SIZE); + + switch (rsp_opcode) { + case NCI_OP_CORE_RESET_RSP: + nci_core_reset_rsp_packet(ndev, skb); + break; + + case NCI_OP_CORE_INIT_RSP: + nci_core_init_rsp_packet(ndev, skb); + break; + + case NCI_OP_RF_DISCOVER_MAP_RSP: + nci_rf_disc_map_rsp_packet(ndev, skb); + break; + + case NCI_OP_RF_DISCOVER_RSP: + nci_rf_disc_rsp_packet(ndev, skb); + break; + + case NCI_OP_RF_DEACTIVATE_RSP: + nci_rf_deactivate_rsp_packet(ndev, skb); + break; + + default: + pr_err("unknown rsp opcode 0x%x\n", rsp_opcode); + break; + } + + kfree_skb(skb); + + /* trigger the next cmd */ + atomic_set(&ndev->cmd_cnt, 1); + if (!skb_queue_empty(&ndev->cmd_q)) + queue_work(ndev->cmd_wq, &ndev->cmd_work); +} diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index ccdff7953f7d..6989dfa28ee2 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -21,6 +21,8 @@ * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": %s: " fmt, __func__ + #include <net/genetlink.h> #include <linux/nfc.h> #include <linux/slab.h> @@ -44,6 +46,8 @@ static const struct nla_policy nfc_genl_policy[NFC_ATTR_MAX + 1] = { [NFC_ATTR_DEVICE_NAME] = { .type = NLA_STRING, .len = NFC_DEVICE_NAME_MAXSIZE }, [NFC_ATTR_PROTOCOLS] = { .type = NLA_U32 }, + [NFC_ATTR_COMM_MODE] = { .type = NLA_U8 }, + [NFC_ATTR_RF_MODE] = { .type = NLA_U8 }, }; static int nfc_genl_send_target(struct sk_buff *msg, struct nfc_target *target, @@ -51,8 +55,6 @@ static int nfc_genl_send_target(struct sk_buff *msg, struct nfc_target *target, { void *hdr; - nfc_dbg("entry"); - hdr = genlmsg_put(msg, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, &nfc_genl_family, flags, NFC_CMD_GET_TARGET); if (!hdr) @@ -65,6 +67,9 @@ static int nfc_genl_send_target(struct sk_buff *msg, struct nfc_target *target, target->supported_protocols); NLA_PUT_U16(msg, NFC_ATTR_TARGET_SENS_RES, target->sens_res); NLA_PUT_U8(msg, NFC_ATTR_TARGET_SEL_RES, target->sel_res); + if (target->nfcid1_len > 0) + NLA_PUT(msg, NFC_ATTR_TARGET_NFCID1, target->nfcid1_len, + target->nfcid1); return genlmsg_end(msg, hdr); @@ -105,8 +110,6 @@ static int nfc_genl_dump_targets(struct sk_buff *skb, struct nfc_dev *dev = (struct nfc_dev *) cb->args[1]; int rc; - nfc_dbg("entry"); - if (!dev) { dev = __get_device_from_cb(cb); if (IS_ERR(dev)) @@ -139,8 +142,6 @@ static int nfc_genl_dump_targets_done(struct netlink_callback *cb) { struct nfc_dev *dev = (struct nfc_dev *) cb->args[1]; - nfc_dbg("entry"); - if (dev) nfc_put_device(dev); @@ -152,8 +153,6 @@ int nfc_genl_targets_found(struct nfc_dev *dev) struct sk_buff *msg; void *hdr; - nfc_dbg("entry"); - dev->genl_data.poll_req_pid = 0; msg = nlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC); @@ -183,8 +182,6 @@ int nfc_genl_device_added(struct nfc_dev *dev) struct sk_buff *msg; void *hdr; - nfc_dbg("entry"); - msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); if (!msg) return -ENOMEM; @@ -216,8 +213,6 @@ int nfc_genl_device_removed(struct nfc_dev *dev) struct sk_buff *msg; void *hdr; - nfc_dbg("entry"); - msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); if (!msg) return -ENOMEM; @@ -249,8 +244,6 @@ static int nfc_genl_send_device(struct sk_buff *msg, struct nfc_dev *dev, { void *hdr; - nfc_dbg("entry"); - hdr = genlmsg_put(msg, pid, seq, &nfc_genl_family, flags, NFC_CMD_GET_DEVICE); if (!hdr) @@ -277,8 +270,6 @@ static int nfc_genl_dump_devices(struct sk_buff *skb, struct nfc_dev *dev = (struct nfc_dev *) cb->args[1]; bool first_call = false; - nfc_dbg("entry"); - if (!iter) { first_call = true; iter = kmalloc(sizeof(struct class_dev_iter), GFP_KERNEL); @@ -319,14 +310,81 @@ static int nfc_genl_dump_devices_done(struct netlink_callback *cb) { struct class_dev_iter *iter = (struct class_dev_iter *) cb->args[0]; - nfc_dbg("entry"); - nfc_device_iter_exit(iter); kfree(iter); return 0; } +int nfc_genl_dep_link_up_event(struct nfc_dev *dev, u32 target_idx, + u8 comm_mode, u8 rf_mode) +{ + struct sk_buff *msg; + void *hdr; + + pr_debug("DEP link is up\n"); + + msg = nlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC); + if (!msg) + return -ENOMEM; + + hdr = genlmsg_put(msg, 0, 0, &nfc_genl_family, 0, + NFC_CMD_DEP_LINK_UP); + if (!hdr) + goto free_msg; + + NLA_PUT_U32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx); + if (rf_mode == NFC_RF_INITIATOR) + NLA_PUT_U32(msg, NFC_ATTR_TARGET_INDEX, target_idx); + NLA_PUT_U8(msg, NFC_ATTR_COMM_MODE, comm_mode); + NLA_PUT_U8(msg, NFC_ATTR_RF_MODE, rf_mode); + + genlmsg_end(msg, hdr); + + dev->dep_link_up = true; + + genlmsg_multicast(msg, 0, nfc_genl_event_mcgrp.id, GFP_ATOMIC); + + return 0; + +nla_put_failure: + genlmsg_cancel(msg, hdr); +free_msg: + nlmsg_free(msg); + return -EMSGSIZE; +} + +int nfc_genl_dep_link_down_event(struct nfc_dev *dev) +{ + struct sk_buff *msg; + void *hdr; + + pr_debug("DEP link is down\n"); + + msg = nlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC); + if (!msg) + return -ENOMEM; + + hdr = genlmsg_put(msg, 0, 0, &nfc_genl_family, 0, + NFC_CMD_DEP_LINK_DOWN); + if (!hdr) + goto free_msg; + + NLA_PUT_U32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx); + + genlmsg_end(msg, hdr); + + genlmsg_multicast(msg, 0, nfc_genl_event_mcgrp.id, GFP_ATOMIC); + + return 0; + +nla_put_failure: + genlmsg_cancel(msg, hdr); +free_msg: + nlmsg_free(msg); + return -EMSGSIZE; +} + static int nfc_genl_get_device(struct sk_buff *skb, struct genl_info *info) { struct sk_buff *msg; @@ -334,8 +392,6 @@ static int nfc_genl_get_device(struct sk_buff *skb, struct genl_info *info) u32 idx; int rc = -ENOBUFS; - nfc_dbg("entry"); - if (!info->attrs[NFC_ATTR_DEVICE_INDEX]) return -EINVAL; @@ -367,6 +423,48 @@ out_putdev: return rc; } +static int nfc_genl_dev_up(struct sk_buff *skb, struct genl_info *info) +{ + struct nfc_dev *dev; + int rc; + u32 idx; + + if (!info->attrs[NFC_ATTR_DEVICE_INDEX]) + return -EINVAL; + + idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]); + + dev = nfc_get_device(idx); + if (!dev) + return -ENODEV; + + rc = nfc_dev_up(dev); + + nfc_put_device(dev); + return rc; +} + +static int nfc_genl_dev_down(struct sk_buff *skb, struct genl_info *info) +{ + struct nfc_dev *dev; + int rc; + u32 idx; + + if (!info->attrs[NFC_ATTR_DEVICE_INDEX]) + return -EINVAL; + + idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]); + + dev = nfc_get_device(idx); + if (!dev) + return -ENODEV; + + rc = nfc_dev_down(dev); + + nfc_put_device(dev); + return rc; +} + static int nfc_genl_start_poll(struct sk_buff *skb, struct genl_info *info) { struct nfc_dev *dev; @@ -374,7 +472,7 @@ static int nfc_genl_start_poll(struct sk_buff *skb, struct genl_info *info) u32 idx; u32 protocols; - nfc_dbg("entry"); + pr_debug("Poll start\n"); if (!info->attrs[NFC_ATTR_DEVICE_INDEX] || !info->attrs[NFC_ATTR_PROTOCOLS]) @@ -405,8 +503,6 @@ static int nfc_genl_stop_poll(struct sk_buff *skb, struct genl_info *info) int rc; u32 idx; - nfc_dbg("entry"); - if (!info->attrs[NFC_ATTR_DEVICE_INDEX]) return -EINVAL; @@ -432,6 +528,67 @@ out: return rc; } +static int nfc_genl_dep_link_up(struct sk_buff *skb, struct genl_info *info) +{ + struct nfc_dev *dev; + int rc, tgt_idx; + u32 idx; + u8 comm, rf; + + pr_debug("DEP link up\n"); + + if (!info->attrs[NFC_ATTR_DEVICE_INDEX] || + !info->attrs[NFC_ATTR_COMM_MODE] || + !info->attrs[NFC_ATTR_RF_MODE]) + return -EINVAL; + + idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]); + if (!info->attrs[NFC_ATTR_TARGET_INDEX]) + tgt_idx = NFC_TARGET_IDX_ANY; + else + tgt_idx = nla_get_u32(info->attrs[NFC_ATTR_TARGET_INDEX]); + + comm = nla_get_u8(info->attrs[NFC_ATTR_COMM_MODE]); + rf = nla_get_u8(info->attrs[NFC_ATTR_RF_MODE]); + + if (comm != NFC_COMM_ACTIVE && comm != NFC_COMM_PASSIVE) + return -EINVAL; + + if (rf != NFC_RF_INITIATOR && comm != NFC_RF_TARGET) + return -EINVAL; + + dev = nfc_get_device(idx); + if (!dev) + return -ENODEV; + + rc = nfc_dep_link_up(dev, tgt_idx, comm, rf); + + nfc_put_device(dev); + + return rc; +} + +static int nfc_genl_dep_link_down(struct sk_buff *skb, struct genl_info *info) +{ + struct nfc_dev *dev; + int rc; + u32 idx; + + if (!info->attrs[NFC_ATTR_DEVICE_INDEX]) + return -EINVAL; + + idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]); + + dev = nfc_get_device(idx); + if (!dev) + return -ENODEV; + + rc = nfc_dep_link_down(dev); + + nfc_put_device(dev); + return rc; +} + static struct genl_ops nfc_genl_ops[] = { { .cmd = NFC_CMD_GET_DEVICE, @@ -441,6 +598,16 @@ static struct genl_ops nfc_genl_ops[] = { .policy = nfc_genl_policy, }, { + .cmd = NFC_CMD_DEV_UP, + .doit = nfc_genl_dev_up, + .policy = nfc_genl_policy, + }, + { + .cmd = NFC_CMD_DEV_DOWN, + .doit = nfc_genl_dev_down, + .policy = nfc_genl_policy, + }, + { .cmd = NFC_CMD_START_POLL, .doit = nfc_genl_start_poll, .policy = nfc_genl_policy, @@ -451,6 +618,16 @@ static struct genl_ops nfc_genl_ops[] = { .policy = nfc_genl_policy, }, { + .cmd = NFC_CMD_DEP_LINK_UP, + .doit = nfc_genl_dep_link_up, + .policy = nfc_genl_policy, + }, + { + .cmd = NFC_CMD_DEP_LINK_DOWN, + .doit = nfc_genl_dep_link_down, + .policy = nfc_genl_policy, + }, + { .cmd = NFC_CMD_GET_TARGET, .dumpit = nfc_genl_dump_targets, .done = nfc_genl_dump_targets_done, @@ -468,18 +645,16 @@ static int nfc_genl_rcv_nl_event(struct notifier_block *this, if (event != NETLINK_URELEASE || n->protocol != NETLINK_GENERIC) goto out; - nfc_dbg("NETLINK_URELEASE event from id %d", n->pid); + pr_debug("NETLINK_URELEASE event from id %d\n", n->pid); nfc_device_iter_init(&iter); dev = nfc_device_iter_next(&iter); while (dev) { - mutex_lock(&dev->genl_data.genl_data_mutex); if (dev->genl_data.poll_req_pid == n->pid) { nfc_stop_poll(dev); dev->genl_data.poll_req_pid = 0; } - mutex_unlock(&dev->genl_data.genl_data_mutex); dev = nfc_device_iter_next(&iter); } diff --git a/net/nfc/nfc.h b/net/nfc/nfc.h index aaf9832298f3..6d28d75995b0 100644 --- a/net/nfc/nfc.h +++ b/net/nfc/nfc.h @@ -24,16 +24,9 @@ #ifndef __LOCAL_NFC_H #define __LOCAL_NFC_H -#include <net/nfc.h> +#include <net/nfc/nfc.h> #include <net/sock.h> -__attribute__((format (printf, 2, 3))) -int nfc_printk(const char *level, const char *fmt, ...); - -#define nfc_info(fmt, arg...) nfc_printk(KERN_INFO, fmt, ##arg) -#define nfc_err(fmt, arg...) nfc_printk(KERN_ERR, fmt, ##arg) -#define nfc_dbg(fmt, arg...) pr_debug(fmt "\n", ##arg) - struct nfc_protocol { int id; struct proto *proto; @@ -53,6 +46,60 @@ struct nfc_rawsock { #define to_rawsock_sk(_tx_work) \ ((struct sock *) container_of(_tx_work, struct nfc_rawsock, tx_work)) +#ifdef CONFIG_NFC_LLCP + +void nfc_llcp_mac_is_down(struct nfc_dev *dev); +void nfc_llcp_mac_is_up(struct nfc_dev *dev, u32 target_idx, + u8 comm_mode, u8 rf_mode); +int nfc_llcp_register_device(struct nfc_dev *dev); +void nfc_llcp_unregister_device(struct nfc_dev *dev); +int nfc_llcp_set_remote_gb(struct nfc_dev *dev, u8 *gb, u8 gb_len); +u8 *nfc_llcp_general_bytes(struct nfc_dev *dev, u8 *general_bytes_len); +int __init nfc_llcp_init(void); +void nfc_llcp_exit(void); + +#else + +static inline void nfc_llcp_mac_is_down(struct nfc_dev *dev) +{ +} + +static inline void nfc_llcp_mac_is_up(struct nfc_dev *dev, u32 target_idx, + u8 comm_mode, u8 rf_mode) +{ +} + +static inline int nfc_llcp_register_device(struct nfc_dev *dev) +{ + return 0; +} + +static inline void nfc_llcp_unregister_device(struct nfc_dev *dev) +{ +} + +static inline int nfc_llcp_set_remote_gb(struct nfc_dev *dev, u8 *gb, u8 gb_len) +{ + return 0; +} + +static inline u8 *nfc_llcp_general_bytes(struct nfc_dev *dev, u8 *gb_len) +{ + *gb_len = 0; + return NULL; +} + +static inline int nfc_llcp_init(void) +{ + return 0; +} + +static inline void nfc_llcp_exit(void) +{ +} + +#endif + int __init rawsock_init(void); void rawsock_exit(void); @@ -75,6 +122,10 @@ int nfc_genl_targets_found(struct nfc_dev *dev); int nfc_genl_device_added(struct nfc_dev *dev); int nfc_genl_device_removed(struct nfc_dev *dev); +int nfc_genl_dep_link_up_event(struct nfc_dev *dev, u32 target_idx, + u8 comm_mode, u8 rf_mode); +int nfc_genl_dep_link_down_event(struct nfc_dev *dev); + struct nfc_dev *nfc_get_device(unsigned idx); static inline void nfc_put_device(struct nfc_dev *dev) @@ -101,10 +152,19 @@ static inline void nfc_device_iter_exit(struct class_dev_iter *iter) class_dev_iter_exit(iter); } +int nfc_dev_up(struct nfc_dev *dev); + +int nfc_dev_down(struct nfc_dev *dev); + int nfc_start_poll(struct nfc_dev *dev, u32 protocols); int nfc_stop_poll(struct nfc_dev *dev); +int nfc_dep_link_up(struct nfc_dev *dev, int target_idx, + u8 comm_mode, u8 rf_mode); + +int nfc_dep_link_down(struct nfc_dev *dev); + int nfc_activate_target(struct nfc_dev *dev, u32 target_idx, u32 protocol); int nfc_deactivate_target(struct nfc_dev *dev, u32 target_idx); diff --git a/net/nfc/rawsock.c b/net/nfc/rawsock.c index 52de84a55115..2e2f8c6a61fe 100644 --- a/net/nfc/rawsock.c +++ b/net/nfc/rawsock.c @@ -21,14 +21,17 @@ * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": %s: " fmt, __func__ + #include <net/tcp_states.h> #include <linux/nfc.h> +#include <linux/export.h> #include "nfc.h" static void rawsock_write_queue_purge(struct sock *sk) { - nfc_dbg("sk=%p", sk); + pr_debug("sk=%p\n", sk); spin_lock_bh(&sk->sk_write_queue.lock); __skb_queue_purge(&sk->sk_write_queue); @@ -38,7 +41,7 @@ static void rawsock_write_queue_purge(struct sock *sk) static void rawsock_report_error(struct sock *sk, int err) { - nfc_dbg("sk=%p err=%d", sk, err); + pr_debug("sk=%p err=%d\n", sk, err); sk->sk_shutdown = SHUTDOWN_MASK; sk->sk_err = -err; @@ -51,7 +54,7 @@ static int rawsock_release(struct socket *sock) { struct sock *sk = sock->sk; - nfc_dbg("sock=%p", sock); + pr_debug("sock=%p\n", sock); sock_orphan(sk); sock_put(sk); @@ -67,14 +70,14 @@ static int rawsock_connect(struct socket *sock, struct sockaddr *_addr, struct nfc_dev *dev; int rc = 0; - nfc_dbg("sock=%p sk=%p flags=%d", sock, sk, flags); + pr_debug("sock=%p sk=%p flags=%d\n", sock, sk, flags); if (!addr || len < sizeof(struct sockaddr_nfc) || addr->sa_family != AF_NFC) return -EINVAL; - nfc_dbg("addr dev_idx=%u target_idx=%u protocol=%u", addr->dev_idx, - addr->target_idx, addr->nfc_protocol); + pr_debug("addr dev_idx=%u target_idx=%u protocol=%u\n", + addr->dev_idx, addr->target_idx, addr->nfc_protocol); lock_sock(sk); @@ -123,11 +126,7 @@ error: static int rawsock_add_header(struct sk_buff *skb) { - - if (skb_cow_head(skb, 1)) - return -ENOMEM; - - *skb_push(skb, 1) = 0; + *skb_push(skb, NFC_HEADER_SIZE) = 0; return 0; } @@ -139,7 +138,7 @@ static void rawsock_data_exchange_complete(void *context, struct sk_buff *skb, BUG_ON(in_irq()); - nfc_dbg("sk=%p err=%d", sk, err); + pr_debug("sk=%p err=%d\n", sk, err); if (err) goto error; @@ -175,7 +174,7 @@ static void rawsock_tx_work(struct work_struct *work) struct sk_buff *skb; int rc; - nfc_dbg("sk=%p target_idx=%u", sk, target_idx); + pr_debug("sk=%p target_idx=%u\n", sk, target_idx); if (sk->sk_shutdown & SEND_SHUTDOWN) { rawsock_write_queue_purge(sk); @@ -197,10 +196,11 @@ static int rawsock_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; + struct nfc_dev *dev = nfc_rawsock(sk)->dev; struct sk_buff *skb; int rc; - nfc_dbg("sock=%p sk=%p len=%zu", sock, sk, len); + pr_debug("sock=%p sk=%p len=%zu\n", sock, sk, len); if (msg->msg_namelen) return -EOPNOTSUPP; @@ -208,9 +208,8 @@ static int rawsock_sendmsg(struct kiocb *iocb, struct socket *sock, if (sock->state != SS_CONNECTED) return -ENOTCONN; - skb = sock_alloc_send_skb(sk, len, msg->msg_flags & MSG_DONTWAIT, - &rc); - if (!skb) + skb = nfc_alloc_send_skb(dev, sk, msg->msg_flags, len, &rc); + if (skb == NULL) return rc; rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len); @@ -239,7 +238,7 @@ static int rawsock_recvmsg(struct kiocb *iocb, struct socket *sock, int copied; int rc; - nfc_dbg("sock=%p sk=%p len=%zu flags=%d", sock, sk, len, flags); + pr_debug("sock=%p sk=%p len=%zu flags=%d\n", sock, sk, len, flags); skb = skb_recv_datagram(sk, flags, noblock, &rc); if (!skb) @@ -283,7 +282,7 @@ static const struct proto_ops rawsock_ops = { static void rawsock_destruct(struct sock *sk) { - nfc_dbg("sk=%p", sk); + pr_debug("sk=%p\n", sk); if (sk->sk_state == TCP_ESTABLISHED) { nfc_deactivate_target(nfc_rawsock(sk)->dev, @@ -294,7 +293,7 @@ static void rawsock_destruct(struct sock *sk) skb_queue_purge(&sk->sk_receive_queue); if (!sock_flag(sk, SOCK_DEAD)) { - nfc_err("Freeing alive NFC raw socket %p", sk); + pr_err("Freeing alive NFC raw socket %p\n", sk); return; } } @@ -304,14 +303,14 @@ static int rawsock_create(struct net *net, struct socket *sock, { struct sock *sk; - nfc_dbg("sock=%p", sock); + pr_debug("sock=%p\n", sock); if (sock->type != SOCK_SEQPACKET) return -ESOCKTNOSUPPORT; sock->ops = &rawsock_ops; - sk = sk_alloc(net, PF_NFC, GFP_KERNEL, nfc_proto->proto); + sk = sk_alloc(net, PF_NFC, GFP_ATOMIC, nfc_proto->proto); if (!sk) return -ENOMEM; diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig new file mode 100644 index 000000000000..d9ea33c361be --- /dev/null +++ b/net/openvswitch/Kconfig @@ -0,0 +1,28 @@ +# +# Open vSwitch +# + +config OPENVSWITCH + tristate "Open vSwitch" + ---help--- + Open vSwitch is a multilayer Ethernet switch targeted at virtualized + environments. In addition to supporting a variety of features + expected in a traditional hardware switch, it enables fine-grained + programmatic extension and flow-based control of the network. This + control is useful in a wide variety of applications but is + particularly important in multi-server virtualization deployments, + which are often characterized by highly dynamic endpoints and the + need to maintain logical abstractions for multiple tenants. + + The Open vSwitch datapath provides an in-kernel fast path for packet + forwarding. It is complemented by a userspace daemon, ovs-vswitchd, + which is able to accept configuration from a variety of sources and + translate it into packet processing rules. + + See http://openvswitch.org for more information and userspace + utilities. + + To compile this code as a module, choose M here: the module will be + called openvswitch. + + If unsure, say N. diff --git a/net/openvswitch/Makefile b/net/openvswitch/Makefile new file mode 100644 index 000000000000..15e7384745c1 --- /dev/null +++ b/net/openvswitch/Makefile @@ -0,0 +1,14 @@ +# +# Makefile for Open vSwitch. +# + +obj-$(CONFIG_OPENVSWITCH) += openvswitch.o + +openvswitch-y := \ + actions.o \ + datapath.o \ + dp_notify.o \ + flow.o \ + vport.o \ + vport-internal_dev.o \ + vport-netdev.o \ diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c new file mode 100644 index 000000000000..2725d1bdf291 --- /dev/null +++ b/net/openvswitch/actions.c @@ -0,0 +1,415 @@ +/* + * Copyright (c) 2007-2011 Nicira Networks. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/skbuff.h> +#include <linux/in.h> +#include <linux/ip.h> +#include <linux/openvswitch.h> +#include <linux/tcp.h> +#include <linux/udp.h> +#include <linux/in6.h> +#include <linux/if_arp.h> +#include <linux/if_vlan.h> +#include <net/ip.h> +#include <net/checksum.h> +#include <net/dsfield.h> + +#include "datapath.h" +#include "vport.h" + +static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, + const struct nlattr *attr, int len, bool keep_skb); + +static int make_writable(struct sk_buff *skb, int write_len) +{ + if (!skb_cloned(skb) || skb_clone_writable(skb, write_len)) + return 0; + + return pskb_expand_head(skb, 0, 0, GFP_ATOMIC); +} + +/* remove VLAN header from packet and update csum accrodingly. */ +static int __pop_vlan_tci(struct sk_buff *skb, __be16 *current_tci) +{ + struct vlan_hdr *vhdr; + int err; + + err = make_writable(skb, VLAN_ETH_HLEN); + if (unlikely(err)) + return err; + + if (skb->ip_summed == CHECKSUM_COMPLETE) + skb->csum = csum_sub(skb->csum, csum_partial(skb->data + + ETH_HLEN, VLAN_HLEN, 0)); + + vhdr = (struct vlan_hdr *)(skb->data + ETH_HLEN); + *current_tci = vhdr->h_vlan_TCI; + + memmove(skb->data + VLAN_HLEN, skb->data, 2 * ETH_ALEN); + __skb_pull(skb, VLAN_HLEN); + + vlan_set_encap_proto(skb, vhdr); + skb->mac_header += VLAN_HLEN; + skb_reset_mac_len(skb); + + return 0; +} + +static int pop_vlan(struct sk_buff *skb) +{ + __be16 tci; + int err; + + if (likely(vlan_tx_tag_present(skb))) { + skb->vlan_tci = 0; + } else { + if (unlikely(skb->protocol != htons(ETH_P_8021Q) || + skb->len < VLAN_ETH_HLEN)) + return 0; + + err = __pop_vlan_tci(skb, &tci); + if (err) + return err; + } + /* move next vlan tag to hw accel tag */ + if (likely(skb->protocol != htons(ETH_P_8021Q) || + skb->len < VLAN_ETH_HLEN)) + return 0; + + err = __pop_vlan_tci(skb, &tci); + if (unlikely(err)) + return err; + + __vlan_hwaccel_put_tag(skb, ntohs(tci)); + return 0; +} + +static int push_vlan(struct sk_buff *skb, const struct ovs_action_push_vlan *vlan) +{ + if (unlikely(vlan_tx_tag_present(skb))) { + u16 current_tag; + + /* push down current VLAN tag */ + current_tag = vlan_tx_tag_get(skb); + + if (!__vlan_put_tag(skb, current_tag)) + return -ENOMEM; + + if (skb->ip_summed == CHECKSUM_COMPLETE) + skb->csum = csum_add(skb->csum, csum_partial(skb->data + + ETH_HLEN, VLAN_HLEN, 0)); + + } + __vlan_hwaccel_put_tag(skb, ntohs(vlan->vlan_tci) & ~VLAN_TAG_PRESENT); + return 0; +} + +static int set_eth_addr(struct sk_buff *skb, + const struct ovs_key_ethernet *eth_key) +{ + int err; + err = make_writable(skb, ETH_HLEN); + if (unlikely(err)) + return err; + + memcpy(eth_hdr(skb)->h_source, eth_key->eth_src, ETH_ALEN); + memcpy(eth_hdr(skb)->h_dest, eth_key->eth_dst, ETH_ALEN); + + return 0; +} + +static void set_ip_addr(struct sk_buff *skb, struct iphdr *nh, + __be32 *addr, __be32 new_addr) +{ + int transport_len = skb->len - skb_transport_offset(skb); + + if (nh->protocol == IPPROTO_TCP) { + if (likely(transport_len >= sizeof(struct tcphdr))) + inet_proto_csum_replace4(&tcp_hdr(skb)->check, skb, + *addr, new_addr, 1); + } else if (nh->protocol == IPPROTO_UDP) { + if (likely(transport_len >= sizeof(struct udphdr))) + inet_proto_csum_replace4(&udp_hdr(skb)->check, skb, + *addr, new_addr, 1); + } + + csum_replace4(&nh->check, *addr, new_addr); + skb->rxhash = 0; + *addr = new_addr; +} + +static void set_ip_ttl(struct sk_buff *skb, struct iphdr *nh, u8 new_ttl) +{ + csum_replace2(&nh->check, htons(nh->ttl << 8), htons(new_ttl << 8)); + nh->ttl = new_ttl; +} + +static int set_ipv4(struct sk_buff *skb, const struct ovs_key_ipv4 *ipv4_key) +{ + struct iphdr *nh; + int err; + + err = make_writable(skb, skb_network_offset(skb) + + sizeof(struct iphdr)); + if (unlikely(err)) + return err; + + nh = ip_hdr(skb); + + if (ipv4_key->ipv4_src != nh->saddr) + set_ip_addr(skb, nh, &nh->saddr, ipv4_key->ipv4_src); + + if (ipv4_key->ipv4_dst != nh->daddr) + set_ip_addr(skb, nh, &nh->daddr, ipv4_key->ipv4_dst); + + if (ipv4_key->ipv4_tos != nh->tos) + ipv4_change_dsfield(nh, 0, ipv4_key->ipv4_tos); + + if (ipv4_key->ipv4_ttl != nh->ttl) + set_ip_ttl(skb, nh, ipv4_key->ipv4_ttl); + + return 0; +} + +/* Must follow make_writable() since that can move the skb data. */ +static void set_tp_port(struct sk_buff *skb, __be16 *port, + __be16 new_port, __sum16 *check) +{ + inet_proto_csum_replace2(check, skb, *port, new_port, 0); + *port = new_port; + skb->rxhash = 0; +} + +static int set_udp_port(struct sk_buff *skb, + const struct ovs_key_udp *udp_port_key) +{ + struct udphdr *uh; + int err; + + err = make_writable(skb, skb_transport_offset(skb) + + sizeof(struct udphdr)); + if (unlikely(err)) + return err; + + uh = udp_hdr(skb); + if (udp_port_key->udp_src != uh->source) + set_tp_port(skb, &uh->source, udp_port_key->udp_src, &uh->check); + + if (udp_port_key->udp_dst != uh->dest) + set_tp_port(skb, &uh->dest, udp_port_key->udp_dst, &uh->check); + + return 0; +} + +static int set_tcp_port(struct sk_buff *skb, + const struct ovs_key_tcp *tcp_port_key) +{ + struct tcphdr *th; + int err; + + err = make_writable(skb, skb_transport_offset(skb) + + sizeof(struct tcphdr)); + if (unlikely(err)) + return err; + + th = tcp_hdr(skb); + if (tcp_port_key->tcp_src != th->source) + set_tp_port(skb, &th->source, tcp_port_key->tcp_src, &th->check); + + if (tcp_port_key->tcp_dst != th->dest) + set_tp_port(skb, &th->dest, tcp_port_key->tcp_dst, &th->check); + + return 0; +} + +static int do_output(struct datapath *dp, struct sk_buff *skb, int out_port) +{ + struct vport *vport; + + if (unlikely(!skb)) + return -ENOMEM; + + vport = rcu_dereference(dp->ports[out_port]); + if (unlikely(!vport)) { + kfree_skb(skb); + return -ENODEV; + } + + ovs_vport_send(vport, skb); + return 0; +} + +static int output_userspace(struct datapath *dp, struct sk_buff *skb, + const struct nlattr *attr) +{ + struct dp_upcall_info upcall; + const struct nlattr *a; + int rem; + + upcall.cmd = OVS_PACKET_CMD_ACTION; + upcall.key = &OVS_CB(skb)->flow->key; + upcall.userdata = NULL; + upcall.pid = 0; + + for (a = nla_data(attr), rem = nla_len(attr); rem > 0; + a = nla_next(a, &rem)) { + switch (nla_type(a)) { + case OVS_USERSPACE_ATTR_USERDATA: + upcall.userdata = a; + break; + + case OVS_USERSPACE_ATTR_PID: + upcall.pid = nla_get_u32(a); + break; + } + } + + return ovs_dp_upcall(dp, skb, &upcall); +} + +static int sample(struct datapath *dp, struct sk_buff *skb, + const struct nlattr *attr) +{ + const struct nlattr *acts_list = NULL; + const struct nlattr *a; + int rem; + + for (a = nla_data(attr), rem = nla_len(attr); rem > 0; + a = nla_next(a, &rem)) { + switch (nla_type(a)) { + case OVS_SAMPLE_ATTR_PROBABILITY: + if (net_random() >= nla_get_u32(a)) + return 0; + break; + + case OVS_SAMPLE_ATTR_ACTIONS: + acts_list = a; + break; + } + } + + return do_execute_actions(dp, skb, nla_data(acts_list), + nla_len(acts_list), true); +} + +static int execute_set_action(struct sk_buff *skb, + const struct nlattr *nested_attr) +{ + int err = 0; + + switch (nla_type(nested_attr)) { + case OVS_KEY_ATTR_PRIORITY: + skb->priority = nla_get_u32(nested_attr); + break; + + case OVS_KEY_ATTR_ETHERNET: + err = set_eth_addr(skb, nla_data(nested_attr)); + break; + + case OVS_KEY_ATTR_IPV4: + err = set_ipv4(skb, nla_data(nested_attr)); + break; + + case OVS_KEY_ATTR_TCP: + err = set_tcp_port(skb, nla_data(nested_attr)); + break; + + case OVS_KEY_ATTR_UDP: + err = set_udp_port(skb, nla_data(nested_attr)); + break; + } + + return err; +} + +/* Execute a list of actions against 'skb'. */ +static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, + const struct nlattr *attr, int len, bool keep_skb) +{ + /* Every output action needs a separate clone of 'skb', but the common + * case is just a single output action, so that doing a clone and + * then freeing the original skbuff is wasteful. So the following code + * is slightly obscure just to avoid that. */ + int prev_port = -1; + const struct nlattr *a; + int rem; + + for (a = attr, rem = len; rem > 0; + a = nla_next(a, &rem)) { + int err = 0; + + if (prev_port != -1) { + do_output(dp, skb_clone(skb, GFP_ATOMIC), prev_port); + prev_port = -1; + } + + switch (nla_type(a)) { + case OVS_ACTION_ATTR_OUTPUT: + prev_port = nla_get_u32(a); + break; + + case OVS_ACTION_ATTR_USERSPACE: + output_userspace(dp, skb, a); + break; + + case OVS_ACTION_ATTR_PUSH_VLAN: + err = push_vlan(skb, nla_data(a)); + if (unlikely(err)) /* skb already freed. */ + return err; + break; + + case OVS_ACTION_ATTR_POP_VLAN: + err = pop_vlan(skb); + break; + + case OVS_ACTION_ATTR_SET: + err = execute_set_action(skb, nla_data(a)); + break; + + case OVS_ACTION_ATTR_SAMPLE: + err = sample(dp, skb, a); + break; + } + + if (unlikely(err)) { + kfree_skb(skb); + return err; + } + } + + if (prev_port != -1) { + if (keep_skb) + skb = skb_clone(skb, GFP_ATOMIC); + + do_output(dp, skb, prev_port); + } else if (!keep_skb) + consume_skb(skb); + + return 0; +} + +/* Execute a list of actions against 'skb'. */ +int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb) +{ + struct sw_flow_actions *acts = rcu_dereference(OVS_CB(skb)->flow->sf_acts); + + return do_execute_actions(dp, skb, acts->actions, + acts->actions_len, false); +} diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c new file mode 100644 index 000000000000..9a2725114e99 --- /dev/null +++ b/net/openvswitch/datapath.c @@ -0,0 +1,1912 @@ +/* + * Copyright (c) 2007-2011 Nicira Networks. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/if_arp.h> +#include <linux/if_vlan.h> +#include <linux/in.h> +#include <linux/ip.h> +#include <linux/jhash.h> +#include <linux/delay.h> +#include <linux/time.h> +#include <linux/etherdevice.h> +#include <linux/genetlink.h> +#include <linux/kernel.h> +#include <linux/kthread.h> +#include <linux/mutex.h> +#include <linux/percpu.h> +#include <linux/rcupdate.h> +#include <linux/tcp.h> +#include <linux/udp.h> +#include <linux/version.h> +#include <linux/ethtool.h> +#include <linux/wait.h> +#include <asm/system.h> +#include <asm/div64.h> +#include <linux/highmem.h> +#include <linux/netfilter_bridge.h> +#include <linux/netfilter_ipv4.h> +#include <linux/inetdevice.h> +#include <linux/list.h> +#include <linux/openvswitch.h> +#include <linux/rculist.h> +#include <linux/dmi.h> +#include <linux/workqueue.h> +#include <net/genetlink.h> + +#include "datapath.h" +#include "flow.h" +#include "vport-internal_dev.h" + +/** + * DOC: Locking: + * + * Writes to device state (add/remove datapath, port, set operations on vports, + * etc.) are protected by RTNL. + * + * Writes to other state (flow table modifications, set miscellaneous datapath + * parameters, etc.) are protected by genl_mutex. The RTNL lock nests inside + * genl_mutex. + * + * Reads are protected by RCU. + * + * There are a few special cases (mostly stats) that have their own + * synchronization but they nest under all of above and don't interact with + * each other. + */ + +/* Global list of datapaths to enable dumping them all out. + * Protected by genl_mutex. + */ +static LIST_HEAD(dps); + +#define REHASH_FLOW_INTERVAL (10 * 60 * HZ) +static void rehash_flow_table(struct work_struct *work); +static DECLARE_DELAYED_WORK(rehash_flow_wq, rehash_flow_table); + +static struct vport *new_vport(const struct vport_parms *); +static int queue_gso_packets(int dp_ifindex, struct sk_buff *, + const struct dp_upcall_info *); +static int queue_userspace_packet(int dp_ifindex, struct sk_buff *, + const struct dp_upcall_info *); + +/* Must be called with rcu_read_lock, genl_mutex, or RTNL lock. */ +static struct datapath *get_dp(int dp_ifindex) +{ + struct datapath *dp = NULL; + struct net_device *dev; + + rcu_read_lock(); + dev = dev_get_by_index_rcu(&init_net, dp_ifindex); + if (dev) { + struct vport *vport = ovs_internal_dev_get_vport(dev); + if (vport) + dp = vport->dp; + } + rcu_read_unlock(); + + return dp; +} + +/* Must be called with rcu_read_lock or RTNL lock. */ +const char *ovs_dp_name(const struct datapath *dp) +{ + struct vport *vport = rcu_dereference_rtnl(dp->ports[OVSP_LOCAL]); + return vport->ops->get_name(vport); +} + +static int get_dpifindex(struct datapath *dp) +{ + struct vport *local; + int ifindex; + + rcu_read_lock(); + + local = rcu_dereference(dp->ports[OVSP_LOCAL]); + if (local) + ifindex = local->ops->get_ifindex(local); + else + ifindex = 0; + + rcu_read_unlock(); + + return ifindex; +} + +static void destroy_dp_rcu(struct rcu_head *rcu) +{ + struct datapath *dp = container_of(rcu, struct datapath, rcu); + + ovs_flow_tbl_destroy((__force struct flow_table *)dp->table); + free_percpu(dp->stats_percpu); + kfree(dp); +} + +/* Called with RTNL lock and genl_lock. */ +static struct vport *new_vport(const struct vport_parms *parms) +{ + struct vport *vport; + + vport = ovs_vport_add(parms); + if (!IS_ERR(vport)) { + struct datapath *dp = parms->dp; + + rcu_assign_pointer(dp->ports[parms->port_no], vport); + list_add(&vport->node, &dp->port_list); + } + + return vport; +} + +/* Called with RTNL lock. */ +void ovs_dp_detach_port(struct vport *p) +{ + ASSERT_RTNL(); + + /* First drop references to device. */ + list_del(&p->node); + rcu_assign_pointer(p->dp->ports[p->port_no], NULL); + + /* Then destroy it. */ + ovs_vport_del(p); +} + +/* Must be called with rcu_read_lock. */ +void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb) +{ + struct datapath *dp = p->dp; + struct sw_flow *flow; + struct dp_stats_percpu *stats; + struct sw_flow_key key; + u64 *stats_counter; + int error; + int key_len; + + stats = per_cpu_ptr(dp->stats_percpu, smp_processor_id()); + + /* Extract flow from 'skb' into 'key'. */ + error = ovs_flow_extract(skb, p->port_no, &key, &key_len); + if (unlikely(error)) { + kfree_skb(skb); + return; + } + + /* Look up flow. */ + flow = ovs_flow_tbl_lookup(rcu_dereference(dp->table), &key, key_len); + if (unlikely(!flow)) { + struct dp_upcall_info upcall; + + upcall.cmd = OVS_PACKET_CMD_MISS; + upcall.key = &key; + upcall.userdata = NULL; + upcall.pid = p->upcall_pid; + ovs_dp_upcall(dp, skb, &upcall); + consume_skb(skb); + stats_counter = &stats->n_missed; + goto out; + } + + OVS_CB(skb)->flow = flow; + + stats_counter = &stats->n_hit; + ovs_flow_used(OVS_CB(skb)->flow, skb); + ovs_execute_actions(dp, skb); + +out: + /* Update datapath statistics. */ + u64_stats_update_begin(&stats->sync); + (*stats_counter)++; + u64_stats_update_end(&stats->sync); +} + +static struct genl_family dp_packet_genl_family = { + .id = GENL_ID_GENERATE, + .hdrsize = sizeof(struct ovs_header), + .name = OVS_PACKET_FAMILY, + .version = OVS_PACKET_VERSION, + .maxattr = OVS_PACKET_ATTR_MAX +}; + +int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb, + const struct dp_upcall_info *upcall_info) +{ + struct dp_stats_percpu *stats; + int dp_ifindex; + int err; + + if (upcall_info->pid == 0) { + err = -ENOTCONN; + goto err; + } + + dp_ifindex = get_dpifindex(dp); + if (!dp_ifindex) { + err = -ENODEV; + goto err; + } + + if (!skb_is_gso(skb)) + err = queue_userspace_packet(dp_ifindex, skb, upcall_info); + else + err = queue_gso_packets(dp_ifindex, skb, upcall_info); + if (err) + goto err; + + return 0; + +err: + stats = per_cpu_ptr(dp->stats_percpu, smp_processor_id()); + + u64_stats_update_begin(&stats->sync); + stats->n_lost++; + u64_stats_update_end(&stats->sync); + + return err; +} + +static int queue_gso_packets(int dp_ifindex, struct sk_buff *skb, + const struct dp_upcall_info *upcall_info) +{ + struct dp_upcall_info later_info; + struct sw_flow_key later_key; + struct sk_buff *segs, *nskb; + int err; + + segs = skb_gso_segment(skb, NETIF_F_SG | NETIF_F_HW_CSUM); + if (IS_ERR(skb)) + return PTR_ERR(skb); + + /* Queue all of the segments. */ + skb = segs; + do { + err = queue_userspace_packet(dp_ifindex, skb, upcall_info); + if (err) + break; + + if (skb == segs && skb_shinfo(skb)->gso_type & SKB_GSO_UDP) { + /* The initial flow key extracted by ovs_flow_extract() + * in this case is for a first fragment, so we need to + * properly mark later fragments. + */ + later_key = *upcall_info->key; + later_key.ip.frag = OVS_FRAG_TYPE_LATER; + + later_info = *upcall_info; + later_info.key = &later_key; + upcall_info = &later_info; + } + } while ((skb = skb->next)); + + /* Free all of the segments. */ + skb = segs; + do { + nskb = skb->next; + if (err) + kfree_skb(skb); + else + consume_skb(skb); + } while ((skb = nskb)); + return err; +} + +static int queue_userspace_packet(int dp_ifindex, struct sk_buff *skb, + const struct dp_upcall_info *upcall_info) +{ + struct ovs_header *upcall; + struct sk_buff *nskb = NULL; + struct sk_buff *user_skb; /* to be queued to userspace */ + struct nlattr *nla; + unsigned int len; + int err; + + if (vlan_tx_tag_present(skb)) { + nskb = skb_clone(skb, GFP_ATOMIC); + if (!nskb) + return -ENOMEM; + + nskb = __vlan_put_tag(nskb, vlan_tx_tag_get(nskb)); + if (!skb) + return -ENOMEM; + + nskb->vlan_tci = 0; + skb = nskb; + } + + if (nla_attr_size(skb->len) > USHRT_MAX) { + err = -EFBIG; + goto out; + } + + len = sizeof(struct ovs_header); + len += nla_total_size(skb->len); + len += nla_total_size(FLOW_BUFSIZE); + if (upcall_info->cmd == OVS_PACKET_CMD_ACTION) + len += nla_total_size(8); + + user_skb = genlmsg_new(len, GFP_ATOMIC); + if (!user_skb) { + err = -ENOMEM; + goto out; + } + + upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family, + 0, upcall_info->cmd); + upcall->dp_ifindex = dp_ifindex; + + nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_KEY); + ovs_flow_to_nlattrs(upcall_info->key, user_skb); + nla_nest_end(user_skb, nla); + + if (upcall_info->userdata) + nla_put_u64(user_skb, OVS_PACKET_ATTR_USERDATA, + nla_get_u64(upcall_info->userdata)); + + nla = __nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, skb->len); + + skb_copy_and_csum_dev(skb, nla_data(nla)); + + err = genlmsg_unicast(&init_net, user_skb, upcall_info->pid); + +out: + kfree_skb(nskb); + return err; +} + +/* Called with genl_mutex. */ +static int flush_flows(int dp_ifindex) +{ + struct flow_table *old_table; + struct flow_table *new_table; + struct datapath *dp; + + dp = get_dp(dp_ifindex); + if (!dp) + return -ENODEV; + + old_table = genl_dereference(dp->table); + new_table = ovs_flow_tbl_alloc(TBL_MIN_BUCKETS); + if (!new_table) + return -ENOMEM; + + rcu_assign_pointer(dp->table, new_table); + + ovs_flow_tbl_deferred_destroy(old_table); + return 0; +} + +static int validate_actions(const struct nlattr *attr, + const struct sw_flow_key *key, int depth); + +static int validate_sample(const struct nlattr *attr, + const struct sw_flow_key *key, int depth) +{ + const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1]; + const struct nlattr *probability, *actions; + const struct nlattr *a; + int rem; + + memset(attrs, 0, sizeof(attrs)); + nla_for_each_nested(a, attr, rem) { + int type = nla_type(a); + if (!type || type > OVS_SAMPLE_ATTR_MAX || attrs[type]) + return -EINVAL; + attrs[type] = a; + } + if (rem) + return -EINVAL; + + probability = attrs[OVS_SAMPLE_ATTR_PROBABILITY]; + if (!probability || nla_len(probability) != sizeof(u32)) + return -EINVAL; + + actions = attrs[OVS_SAMPLE_ATTR_ACTIONS]; + if (!actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN)) + return -EINVAL; + return validate_actions(actions, key, depth + 1); +} + +static int validate_set(const struct nlattr *a, + const struct sw_flow_key *flow_key) +{ + const struct nlattr *ovs_key = nla_data(a); + int key_type = nla_type(ovs_key); + + /* There can be only one key in a action */ + if (nla_total_size(nla_len(ovs_key)) != nla_len(a)) + return -EINVAL; + + if (key_type > OVS_KEY_ATTR_MAX || + nla_len(ovs_key) != ovs_key_lens[key_type]) + return -EINVAL; + + switch (key_type) { + const struct ovs_key_ipv4 *ipv4_key; + + case OVS_KEY_ATTR_PRIORITY: + case OVS_KEY_ATTR_ETHERNET: + break; + + case OVS_KEY_ATTR_IPV4: + if (flow_key->eth.type != htons(ETH_P_IP)) + return -EINVAL; + + if (!flow_key->ipv4.addr.src || !flow_key->ipv4.addr.dst) + return -EINVAL; + + ipv4_key = nla_data(ovs_key); + if (ipv4_key->ipv4_proto != flow_key->ip.proto) + return -EINVAL; + + if (ipv4_key->ipv4_frag != flow_key->ip.frag) + return -EINVAL; + + break; + + case OVS_KEY_ATTR_TCP: + if (flow_key->ip.proto != IPPROTO_TCP) + return -EINVAL; + + if (!flow_key->ipv4.tp.src || !flow_key->ipv4.tp.dst) + return -EINVAL; + + break; + + case OVS_KEY_ATTR_UDP: + if (flow_key->ip.proto != IPPROTO_UDP) + return -EINVAL; + + if (!flow_key->ipv4.tp.src || !flow_key->ipv4.tp.dst) + return -EINVAL; + break; + + default: + return -EINVAL; + } + + return 0; +} + +static int validate_userspace(const struct nlattr *attr) +{ + static const struct nla_policy userspace_policy[OVS_USERSPACE_ATTR_MAX + 1] = { + [OVS_USERSPACE_ATTR_PID] = {.type = NLA_U32 }, + [OVS_USERSPACE_ATTR_USERDATA] = {.type = NLA_U64 }, + }; + struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1]; + int error; + + error = nla_parse_nested(a, OVS_USERSPACE_ATTR_MAX, + attr, userspace_policy); + if (error) + return error; + + if (!a[OVS_USERSPACE_ATTR_PID] || + !nla_get_u32(a[OVS_USERSPACE_ATTR_PID])) + return -EINVAL; + + return 0; +} + +static int validate_actions(const struct nlattr *attr, + const struct sw_flow_key *key, int depth) +{ + const struct nlattr *a; + int rem, err; + + if (depth >= SAMPLE_ACTION_DEPTH) + return -EOVERFLOW; + + nla_for_each_nested(a, attr, rem) { + /* Expected argument lengths, (u32)-1 for variable length. */ + static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = { + [OVS_ACTION_ATTR_OUTPUT] = sizeof(u32), + [OVS_ACTION_ATTR_USERSPACE] = (u32)-1, + [OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan), + [OVS_ACTION_ATTR_POP_VLAN] = 0, + [OVS_ACTION_ATTR_SET] = (u32)-1, + [OVS_ACTION_ATTR_SAMPLE] = (u32)-1 + }; + const struct ovs_action_push_vlan *vlan; + int type = nla_type(a); + + if (type > OVS_ACTION_ATTR_MAX || + (action_lens[type] != nla_len(a) && + action_lens[type] != (u32)-1)) + return -EINVAL; + + switch (type) { + case OVS_ACTION_ATTR_UNSPEC: + return -EINVAL; + + case OVS_ACTION_ATTR_USERSPACE: + err = validate_userspace(a); + if (err) + return err; + break; + + case OVS_ACTION_ATTR_OUTPUT: + if (nla_get_u32(a) >= DP_MAX_PORTS) + return -EINVAL; + break; + + + case OVS_ACTION_ATTR_POP_VLAN: + break; + + case OVS_ACTION_ATTR_PUSH_VLAN: + vlan = nla_data(a); + if (vlan->vlan_tpid != htons(ETH_P_8021Q)) + return -EINVAL; + if (!(vlan->vlan_tci & htons(VLAN_TAG_PRESENT))) + return -EINVAL; + break; + + case OVS_ACTION_ATTR_SET: + err = validate_set(a, key); + if (err) + return err; + break; + + case OVS_ACTION_ATTR_SAMPLE: + err = validate_sample(a, key, depth); + if (err) + return err; + break; + + default: + return -EINVAL; + } + } + + if (rem > 0) + return -EINVAL; + + return 0; +} + +static void clear_stats(struct sw_flow *flow) +{ + flow->used = 0; + flow->tcp_flags = 0; + flow->packet_count = 0; + flow->byte_count = 0; +} + +static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) +{ + struct ovs_header *ovs_header = info->userhdr; + struct nlattr **a = info->attrs; + struct sw_flow_actions *acts; + struct sk_buff *packet; + struct sw_flow *flow; + struct datapath *dp; + struct ethhdr *eth; + int len; + int err; + int key_len; + + err = -EINVAL; + if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] || + !a[OVS_PACKET_ATTR_ACTIONS] || + nla_len(a[OVS_PACKET_ATTR_PACKET]) < ETH_HLEN) + goto err; + + len = nla_len(a[OVS_PACKET_ATTR_PACKET]); + packet = __dev_alloc_skb(NET_IP_ALIGN + len, GFP_KERNEL); + err = -ENOMEM; + if (!packet) + goto err; + skb_reserve(packet, NET_IP_ALIGN); + + memcpy(__skb_put(packet, len), nla_data(a[OVS_PACKET_ATTR_PACKET]), len); + + skb_reset_mac_header(packet); + eth = eth_hdr(packet); + + /* Normally, setting the skb 'protocol' field would be handled by a + * call to eth_type_trans(), but it assumes there's a sending + * device, which we may not have. */ + if (ntohs(eth->h_proto) >= 1536) + packet->protocol = eth->h_proto; + else + packet->protocol = htons(ETH_P_802_2); + + /* Build an sw_flow for sending this packet. */ + flow = ovs_flow_alloc(); + err = PTR_ERR(flow); + if (IS_ERR(flow)) + goto err_kfree_skb; + + err = ovs_flow_extract(packet, -1, &flow->key, &key_len); + if (err) + goto err_flow_free; + + err = ovs_flow_metadata_from_nlattrs(&flow->key.phy.priority, + &flow->key.phy.in_port, + a[OVS_PACKET_ATTR_KEY]); + if (err) + goto err_flow_free; + + err = validate_actions(a[OVS_PACKET_ATTR_ACTIONS], &flow->key, 0); + if (err) + goto err_flow_free; + + flow->hash = ovs_flow_hash(&flow->key, key_len); + + acts = ovs_flow_actions_alloc(a[OVS_PACKET_ATTR_ACTIONS]); + err = PTR_ERR(acts); + if (IS_ERR(acts)) + goto err_flow_free; + rcu_assign_pointer(flow->sf_acts, acts); + + OVS_CB(packet)->flow = flow; + packet->priority = flow->key.phy.priority; + + rcu_read_lock(); + dp = get_dp(ovs_header->dp_ifindex); + err = -ENODEV; + if (!dp) + goto err_unlock; + + local_bh_disable(); + err = ovs_execute_actions(dp, packet); + local_bh_enable(); + rcu_read_unlock(); + + ovs_flow_free(flow); + return err; + +err_unlock: + rcu_read_unlock(); +err_flow_free: + ovs_flow_free(flow); +err_kfree_skb: + kfree_skb(packet); +err: + return err; +} + +static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = { + [OVS_PACKET_ATTR_PACKET] = { .type = NLA_UNSPEC }, + [OVS_PACKET_ATTR_KEY] = { .type = NLA_NESTED }, + [OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED }, +}; + +static struct genl_ops dp_packet_genl_ops[] = { + { .cmd = OVS_PACKET_CMD_EXECUTE, + .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ + .policy = packet_policy, + .doit = ovs_packet_cmd_execute + } +}; + +static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats) +{ + int i; + struct flow_table *table = genl_dereference(dp->table); + + stats->n_flows = ovs_flow_tbl_count(table); + + stats->n_hit = stats->n_missed = stats->n_lost = 0; + for_each_possible_cpu(i) { + const struct dp_stats_percpu *percpu_stats; + struct dp_stats_percpu local_stats; + unsigned int start; + + percpu_stats = per_cpu_ptr(dp->stats_percpu, i); + + do { + start = u64_stats_fetch_begin_bh(&percpu_stats->sync); + local_stats = *percpu_stats; + } while (u64_stats_fetch_retry_bh(&percpu_stats->sync, start)); + + stats->n_hit += local_stats.n_hit; + stats->n_missed += local_stats.n_missed; + stats->n_lost += local_stats.n_lost; + } +} + +static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = { + [OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED }, + [OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED }, + [OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG }, +}; + +static struct genl_family dp_flow_genl_family = { + .id = GENL_ID_GENERATE, + .hdrsize = sizeof(struct ovs_header), + .name = OVS_FLOW_FAMILY, + .version = OVS_FLOW_VERSION, + .maxattr = OVS_FLOW_ATTR_MAX +}; + +static struct genl_multicast_group ovs_dp_flow_multicast_group = { + .name = OVS_FLOW_MCGROUP +}; + +/* Called with genl_lock. */ +static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp, + struct sk_buff *skb, u32 pid, + u32 seq, u32 flags, u8 cmd) +{ + const int skb_orig_len = skb->len; + const struct sw_flow_actions *sf_acts; + struct ovs_flow_stats stats; + struct ovs_header *ovs_header; + struct nlattr *nla; + unsigned long used; + u8 tcp_flags; + int err; + + sf_acts = rcu_dereference_protected(flow->sf_acts, + lockdep_genl_is_held()); + + ovs_header = genlmsg_put(skb, pid, seq, &dp_flow_genl_family, flags, cmd); + if (!ovs_header) + return -EMSGSIZE; + + ovs_header->dp_ifindex = get_dpifindex(dp); + + nla = nla_nest_start(skb, OVS_FLOW_ATTR_KEY); + if (!nla) + goto nla_put_failure; + err = ovs_flow_to_nlattrs(&flow->key, skb); + if (err) + goto error; + nla_nest_end(skb, nla); + + spin_lock_bh(&flow->lock); + used = flow->used; + stats.n_packets = flow->packet_count; + stats.n_bytes = flow->byte_count; + tcp_flags = flow->tcp_flags; + spin_unlock_bh(&flow->lock); + + if (used) + NLA_PUT_U64(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(used)); + + if (stats.n_packets) + NLA_PUT(skb, OVS_FLOW_ATTR_STATS, + sizeof(struct ovs_flow_stats), &stats); + + if (tcp_flags) + NLA_PUT_U8(skb, OVS_FLOW_ATTR_TCP_FLAGS, tcp_flags); + + /* If OVS_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions if + * this is the first flow to be dumped into 'skb'. This is unusual for + * Netlink but individual action lists can be longer than + * NLMSG_GOODSIZE and thus entirely undumpable if we didn't do this. + * The userspace caller can always fetch the actions separately if it + * really wants them. (Most userspace callers in fact don't care.) + * + * This can only fail for dump operations because the skb is always + * properly sized for single flows. + */ + err = nla_put(skb, OVS_FLOW_ATTR_ACTIONS, sf_acts->actions_len, + sf_acts->actions); + if (err < 0 && skb_orig_len) + goto error; + + return genlmsg_end(skb, ovs_header); + +nla_put_failure: + err = -EMSGSIZE; +error: + genlmsg_cancel(skb, ovs_header); + return err; +} + +static struct sk_buff *ovs_flow_cmd_alloc_info(struct sw_flow *flow) +{ + const struct sw_flow_actions *sf_acts; + int len; + + sf_acts = rcu_dereference_protected(flow->sf_acts, + lockdep_genl_is_held()); + + /* OVS_FLOW_ATTR_KEY */ + len = nla_total_size(FLOW_BUFSIZE); + /* OVS_FLOW_ATTR_ACTIONS */ + len += nla_total_size(sf_acts->actions_len); + /* OVS_FLOW_ATTR_STATS */ + len += nla_total_size(sizeof(struct ovs_flow_stats)); + /* OVS_FLOW_ATTR_TCP_FLAGS */ + len += nla_total_size(1); + /* OVS_FLOW_ATTR_USED */ + len += nla_total_size(8); + + len += NLMSG_ALIGN(sizeof(struct ovs_header)); + + return genlmsg_new(len, GFP_KERNEL); +} + +static struct sk_buff *ovs_flow_cmd_build_info(struct sw_flow *flow, + struct datapath *dp, + u32 pid, u32 seq, u8 cmd) +{ + struct sk_buff *skb; + int retval; + + skb = ovs_flow_cmd_alloc_info(flow); + if (!skb) + return ERR_PTR(-ENOMEM); + + retval = ovs_flow_cmd_fill_info(flow, dp, skb, pid, seq, 0, cmd); + BUG_ON(retval < 0); + return skb; +} + +static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) +{ + struct nlattr **a = info->attrs; + struct ovs_header *ovs_header = info->userhdr; + struct sw_flow_key key; + struct sw_flow *flow; + struct sk_buff *reply; + struct datapath *dp; + struct flow_table *table; + int error; + int key_len; + + /* Extract key. */ + error = -EINVAL; + if (!a[OVS_FLOW_ATTR_KEY]) + goto error; + error = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]); + if (error) + goto error; + + /* Validate actions. */ + if (a[OVS_FLOW_ATTR_ACTIONS]) { + error = validate_actions(a[OVS_FLOW_ATTR_ACTIONS], &key, 0); + if (error) + goto error; + } else if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW) { + error = -EINVAL; + goto error; + } + + dp = get_dp(ovs_header->dp_ifindex); + error = -ENODEV; + if (!dp) + goto error; + + table = genl_dereference(dp->table); + flow = ovs_flow_tbl_lookup(table, &key, key_len); + if (!flow) { + struct sw_flow_actions *acts; + + /* Bail out if we're not allowed to create a new flow. */ + error = -ENOENT; + if (info->genlhdr->cmd == OVS_FLOW_CMD_SET) + goto error; + + /* Expand table, if necessary, to make room. */ + if (ovs_flow_tbl_need_to_expand(table)) { + struct flow_table *new_table; + + new_table = ovs_flow_tbl_expand(table); + if (!IS_ERR(new_table)) { + rcu_assign_pointer(dp->table, new_table); + ovs_flow_tbl_deferred_destroy(table); + table = genl_dereference(dp->table); + } + } + + /* Allocate flow. */ + flow = ovs_flow_alloc(); + if (IS_ERR(flow)) { + error = PTR_ERR(flow); + goto error; + } + flow->key = key; + clear_stats(flow); + + /* Obtain actions. */ + acts = ovs_flow_actions_alloc(a[OVS_FLOW_ATTR_ACTIONS]); + error = PTR_ERR(acts); + if (IS_ERR(acts)) + goto error_free_flow; + rcu_assign_pointer(flow->sf_acts, acts); + + /* Put flow in bucket. */ + flow->hash = ovs_flow_hash(&key, key_len); + ovs_flow_tbl_insert(table, flow); + + reply = ovs_flow_cmd_build_info(flow, dp, info->snd_pid, + info->snd_seq, + OVS_FLOW_CMD_NEW); + } else { + /* We found a matching flow. */ + struct sw_flow_actions *old_acts; + struct nlattr *acts_attrs; + + /* Bail out if we're not allowed to modify an existing flow. + * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL + * because Generic Netlink treats the latter as a dump + * request. We also accept NLM_F_EXCL in case that bug ever + * gets fixed. + */ + error = -EEXIST; + if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW && + info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL)) + goto error; + + /* Update actions. */ + old_acts = rcu_dereference_protected(flow->sf_acts, + lockdep_genl_is_held()); + acts_attrs = a[OVS_FLOW_ATTR_ACTIONS]; + if (acts_attrs && + (old_acts->actions_len != nla_len(acts_attrs) || + memcmp(old_acts->actions, nla_data(acts_attrs), + old_acts->actions_len))) { + struct sw_flow_actions *new_acts; + + new_acts = ovs_flow_actions_alloc(acts_attrs); + error = PTR_ERR(new_acts); + if (IS_ERR(new_acts)) + goto error; + + rcu_assign_pointer(flow->sf_acts, new_acts); + ovs_flow_deferred_free_acts(old_acts); + } + + reply = ovs_flow_cmd_build_info(flow, dp, info->snd_pid, + info->snd_seq, OVS_FLOW_CMD_NEW); + + /* Clear stats. */ + if (a[OVS_FLOW_ATTR_CLEAR]) { + spin_lock_bh(&flow->lock); + clear_stats(flow); + spin_unlock_bh(&flow->lock); + } + } + + if (!IS_ERR(reply)) + genl_notify(reply, genl_info_net(info), info->snd_pid, + ovs_dp_flow_multicast_group.id, info->nlhdr, + GFP_KERNEL); + else + netlink_set_err(init_net.genl_sock, 0, + ovs_dp_flow_multicast_group.id, PTR_ERR(reply)); + return 0; + +error_free_flow: + ovs_flow_free(flow); +error: + return error; +} + +static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info) +{ + struct nlattr **a = info->attrs; + struct ovs_header *ovs_header = info->userhdr; + struct sw_flow_key key; + struct sk_buff *reply; + struct sw_flow *flow; + struct datapath *dp; + struct flow_table *table; + int err; + int key_len; + + if (!a[OVS_FLOW_ATTR_KEY]) + return -EINVAL; + err = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]); + if (err) + return err; + + dp = get_dp(ovs_header->dp_ifindex); + if (!dp) + return -ENODEV; + + table = genl_dereference(dp->table); + flow = ovs_flow_tbl_lookup(table, &key, key_len); + if (!flow) + return -ENOENT; + + reply = ovs_flow_cmd_build_info(flow, dp, info->snd_pid, + info->snd_seq, OVS_FLOW_CMD_NEW); + if (IS_ERR(reply)) + return PTR_ERR(reply); + + return genlmsg_reply(reply, info); +} + +static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) +{ + struct nlattr **a = info->attrs; + struct ovs_header *ovs_header = info->userhdr; + struct sw_flow_key key; + struct sk_buff *reply; + struct sw_flow *flow; + struct datapath *dp; + struct flow_table *table; + int err; + int key_len; + + if (!a[OVS_FLOW_ATTR_KEY]) + return flush_flows(ovs_header->dp_ifindex); + err = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]); + if (err) + return err; + + dp = get_dp(ovs_header->dp_ifindex); + if (!dp) + return -ENODEV; + + table = genl_dereference(dp->table); + flow = ovs_flow_tbl_lookup(table, &key, key_len); + if (!flow) + return -ENOENT; + + reply = ovs_flow_cmd_alloc_info(flow); + if (!reply) + return -ENOMEM; + + ovs_flow_tbl_remove(table, flow); + + err = ovs_flow_cmd_fill_info(flow, dp, reply, info->snd_pid, + info->snd_seq, 0, OVS_FLOW_CMD_DEL); + BUG_ON(err < 0); + + ovs_flow_deferred_free(flow); + + genl_notify(reply, genl_info_net(info), info->snd_pid, + ovs_dp_flow_multicast_group.id, info->nlhdr, GFP_KERNEL); + return 0; +} + +static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh)); + struct datapath *dp; + struct flow_table *table; + + dp = get_dp(ovs_header->dp_ifindex); + if (!dp) + return -ENODEV; + + table = genl_dereference(dp->table); + + for (;;) { + struct sw_flow *flow; + u32 bucket, obj; + + bucket = cb->args[0]; + obj = cb->args[1]; + flow = ovs_flow_tbl_next(table, &bucket, &obj); + if (!flow) + break; + + if (ovs_flow_cmd_fill_info(flow, dp, skb, + NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, NLM_F_MULTI, + OVS_FLOW_CMD_NEW) < 0) + break; + + cb->args[0] = bucket; + cb->args[1] = obj; + } + return skb->len; +} + +static struct genl_ops dp_flow_genl_ops[] = { + { .cmd = OVS_FLOW_CMD_NEW, + .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ + .policy = flow_policy, + .doit = ovs_flow_cmd_new_or_set + }, + { .cmd = OVS_FLOW_CMD_DEL, + .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ + .policy = flow_policy, + .doit = ovs_flow_cmd_del + }, + { .cmd = OVS_FLOW_CMD_GET, + .flags = 0, /* OK for unprivileged users. */ + .policy = flow_policy, + .doit = ovs_flow_cmd_get, + .dumpit = ovs_flow_cmd_dump + }, + { .cmd = OVS_FLOW_CMD_SET, + .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ + .policy = flow_policy, + .doit = ovs_flow_cmd_new_or_set, + }, +}; + +static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = { + [OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 }, + [OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 }, +}; + +static struct genl_family dp_datapath_genl_family = { + .id = GENL_ID_GENERATE, + .hdrsize = sizeof(struct ovs_header), + .name = OVS_DATAPATH_FAMILY, + .version = OVS_DATAPATH_VERSION, + .maxattr = OVS_DP_ATTR_MAX +}; + +static struct genl_multicast_group ovs_dp_datapath_multicast_group = { + .name = OVS_DATAPATH_MCGROUP +}; + +static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb, + u32 pid, u32 seq, u32 flags, u8 cmd) +{ + struct ovs_header *ovs_header; + struct ovs_dp_stats dp_stats; + int err; + + ovs_header = genlmsg_put(skb, pid, seq, &dp_datapath_genl_family, + flags, cmd); + if (!ovs_header) + goto error; + + ovs_header->dp_ifindex = get_dpifindex(dp); + + rcu_read_lock(); + err = nla_put_string(skb, OVS_DP_ATTR_NAME, ovs_dp_name(dp)); + rcu_read_unlock(); + if (err) + goto nla_put_failure; + + get_dp_stats(dp, &dp_stats); + NLA_PUT(skb, OVS_DP_ATTR_STATS, sizeof(struct ovs_dp_stats), &dp_stats); + + return genlmsg_end(skb, ovs_header); + +nla_put_failure: + genlmsg_cancel(skb, ovs_header); +error: + return -EMSGSIZE; +} + +static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp, u32 pid, + u32 seq, u8 cmd) +{ + struct sk_buff *skb; + int retval; + + skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!skb) + return ERR_PTR(-ENOMEM); + + retval = ovs_dp_cmd_fill_info(dp, skb, pid, seq, 0, cmd); + if (retval < 0) { + kfree_skb(skb); + return ERR_PTR(retval); + } + return skb; +} + +/* Called with genl_mutex and optionally with RTNL lock also. */ +static struct datapath *lookup_datapath(struct ovs_header *ovs_header, + struct nlattr *a[OVS_DP_ATTR_MAX + 1]) +{ + struct datapath *dp; + + if (!a[OVS_DP_ATTR_NAME]) + dp = get_dp(ovs_header->dp_ifindex); + else { + struct vport *vport; + + rcu_read_lock(); + vport = ovs_vport_locate(nla_data(a[OVS_DP_ATTR_NAME])); + dp = vport && vport->port_no == OVSP_LOCAL ? vport->dp : NULL; + rcu_read_unlock(); + } + return dp ? dp : ERR_PTR(-ENODEV); +} + +static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) +{ + struct nlattr **a = info->attrs; + struct vport_parms parms; + struct sk_buff *reply; + struct datapath *dp; + struct vport *vport; + int err; + + err = -EINVAL; + if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID]) + goto err; + + rtnl_lock(); + err = -ENODEV; + if (!try_module_get(THIS_MODULE)) + goto err_unlock_rtnl; + + err = -ENOMEM; + dp = kzalloc(sizeof(*dp), GFP_KERNEL); + if (dp == NULL) + goto err_put_module; + INIT_LIST_HEAD(&dp->port_list); + + /* Allocate table. */ + err = -ENOMEM; + rcu_assign_pointer(dp->table, ovs_flow_tbl_alloc(TBL_MIN_BUCKETS)); + if (!dp->table) + goto err_free_dp; + + dp->stats_percpu = alloc_percpu(struct dp_stats_percpu); + if (!dp->stats_percpu) { + err = -ENOMEM; + goto err_destroy_table; + } + + /* Set up our datapath device. */ + parms.name = nla_data(a[OVS_DP_ATTR_NAME]); + parms.type = OVS_VPORT_TYPE_INTERNAL; + parms.options = NULL; + parms.dp = dp; + parms.port_no = OVSP_LOCAL; + parms.upcall_pid = nla_get_u32(a[OVS_DP_ATTR_UPCALL_PID]); + + vport = new_vport(&parms); + if (IS_ERR(vport)) { + err = PTR_ERR(vport); + if (err == -EBUSY) + err = -EEXIST; + + goto err_destroy_percpu; + } + + reply = ovs_dp_cmd_build_info(dp, info->snd_pid, + info->snd_seq, OVS_DP_CMD_NEW); + err = PTR_ERR(reply); + if (IS_ERR(reply)) + goto err_destroy_local_port; + + list_add_tail(&dp->list_node, &dps); + rtnl_unlock(); + + genl_notify(reply, genl_info_net(info), info->snd_pid, + ovs_dp_datapath_multicast_group.id, info->nlhdr, + GFP_KERNEL); + return 0; + +err_destroy_local_port: + ovs_dp_detach_port(rtnl_dereference(dp->ports[OVSP_LOCAL])); +err_destroy_percpu: + free_percpu(dp->stats_percpu); +err_destroy_table: + ovs_flow_tbl_destroy(genl_dereference(dp->table)); +err_free_dp: + kfree(dp); +err_put_module: + module_put(THIS_MODULE); +err_unlock_rtnl: + rtnl_unlock(); +err: + return err; +} + +static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info) +{ + struct vport *vport, *next_vport; + struct sk_buff *reply; + struct datapath *dp; + int err; + + rtnl_lock(); + dp = lookup_datapath(info->userhdr, info->attrs); + err = PTR_ERR(dp); + if (IS_ERR(dp)) + goto exit_unlock; + + reply = ovs_dp_cmd_build_info(dp, info->snd_pid, + info->snd_seq, OVS_DP_CMD_DEL); + err = PTR_ERR(reply); + if (IS_ERR(reply)) + goto exit_unlock; + + list_for_each_entry_safe(vport, next_vport, &dp->port_list, node) + if (vport->port_no != OVSP_LOCAL) + ovs_dp_detach_port(vport); + + list_del(&dp->list_node); + ovs_dp_detach_port(rtnl_dereference(dp->ports[OVSP_LOCAL])); + + /* rtnl_unlock() will wait until all the references to devices that + * are pending unregistration have been dropped. We do it here to + * ensure that any internal devices (which contain DP pointers) are + * fully destroyed before freeing the datapath. + */ + rtnl_unlock(); + + call_rcu(&dp->rcu, destroy_dp_rcu); + module_put(THIS_MODULE); + + genl_notify(reply, genl_info_net(info), info->snd_pid, + ovs_dp_datapath_multicast_group.id, info->nlhdr, + GFP_KERNEL); + + return 0; + +exit_unlock: + rtnl_unlock(); + return err; +} + +static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info) +{ + struct sk_buff *reply; + struct datapath *dp; + int err; + + dp = lookup_datapath(info->userhdr, info->attrs); + if (IS_ERR(dp)) + return PTR_ERR(dp); + + reply = ovs_dp_cmd_build_info(dp, info->snd_pid, + info->snd_seq, OVS_DP_CMD_NEW); + if (IS_ERR(reply)) { + err = PTR_ERR(reply); + netlink_set_err(init_net.genl_sock, 0, + ovs_dp_datapath_multicast_group.id, err); + return 0; + } + + genl_notify(reply, genl_info_net(info), info->snd_pid, + ovs_dp_datapath_multicast_group.id, info->nlhdr, + GFP_KERNEL); + + return 0; +} + +static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info) +{ + struct sk_buff *reply; + struct datapath *dp; + + dp = lookup_datapath(info->userhdr, info->attrs); + if (IS_ERR(dp)) + return PTR_ERR(dp); + + reply = ovs_dp_cmd_build_info(dp, info->snd_pid, + info->snd_seq, OVS_DP_CMD_NEW); + if (IS_ERR(reply)) + return PTR_ERR(reply); + + return genlmsg_reply(reply, info); +} + +static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct datapath *dp; + int skip = cb->args[0]; + int i = 0; + + list_for_each_entry(dp, &dps, list_node) { + if (i < skip) + continue; + if (ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, NLM_F_MULTI, + OVS_DP_CMD_NEW) < 0) + break; + i++; + } + + cb->args[0] = i; + + return skb->len; +} + +static struct genl_ops dp_datapath_genl_ops[] = { + { .cmd = OVS_DP_CMD_NEW, + .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ + .policy = datapath_policy, + .doit = ovs_dp_cmd_new + }, + { .cmd = OVS_DP_CMD_DEL, + .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ + .policy = datapath_policy, + .doit = ovs_dp_cmd_del + }, + { .cmd = OVS_DP_CMD_GET, + .flags = 0, /* OK for unprivileged users. */ + .policy = datapath_policy, + .doit = ovs_dp_cmd_get, + .dumpit = ovs_dp_cmd_dump + }, + { .cmd = OVS_DP_CMD_SET, + .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ + .policy = datapath_policy, + .doit = ovs_dp_cmd_set, + }, +}; + +static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = { + [OVS_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 }, + [OVS_VPORT_ATTR_STATS] = { .len = sizeof(struct ovs_vport_stats) }, + [OVS_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 }, + [OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 }, + [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_U32 }, + [OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED }, +}; + +static struct genl_family dp_vport_genl_family = { + .id = GENL_ID_GENERATE, + .hdrsize = sizeof(struct ovs_header), + .name = OVS_VPORT_FAMILY, + .version = OVS_VPORT_VERSION, + .maxattr = OVS_VPORT_ATTR_MAX +}; + +struct genl_multicast_group ovs_dp_vport_multicast_group = { + .name = OVS_VPORT_MCGROUP +}; + +/* Called with RTNL lock or RCU read lock. */ +static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb, + u32 pid, u32 seq, u32 flags, u8 cmd) +{ + struct ovs_header *ovs_header; + struct ovs_vport_stats vport_stats; + int err; + + ovs_header = genlmsg_put(skb, pid, seq, &dp_vport_genl_family, + flags, cmd); + if (!ovs_header) + return -EMSGSIZE; + + ovs_header->dp_ifindex = get_dpifindex(vport->dp); + + NLA_PUT_U32(skb, OVS_VPORT_ATTR_PORT_NO, vport->port_no); + NLA_PUT_U32(skb, OVS_VPORT_ATTR_TYPE, vport->ops->type); + NLA_PUT_STRING(skb, OVS_VPORT_ATTR_NAME, vport->ops->get_name(vport)); + NLA_PUT_U32(skb, OVS_VPORT_ATTR_UPCALL_PID, vport->upcall_pid); + + ovs_vport_get_stats(vport, &vport_stats); + NLA_PUT(skb, OVS_VPORT_ATTR_STATS, sizeof(struct ovs_vport_stats), + &vport_stats); + + err = ovs_vport_get_options(vport, skb); + if (err == -EMSGSIZE) + goto error; + + return genlmsg_end(skb, ovs_header); + +nla_put_failure: + err = -EMSGSIZE; +error: + genlmsg_cancel(skb, ovs_header); + return err; +} + +/* Called with RTNL lock or RCU read lock. */ +struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 pid, + u32 seq, u8 cmd) +{ + struct sk_buff *skb; + int retval; + + skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); + if (!skb) + return ERR_PTR(-ENOMEM); + + retval = ovs_vport_cmd_fill_info(vport, skb, pid, seq, 0, cmd); + if (retval < 0) { + kfree_skb(skb); + return ERR_PTR(retval); + } + return skb; +} + +/* Called with RTNL lock or RCU read lock. */ +static struct vport *lookup_vport(struct ovs_header *ovs_header, + struct nlattr *a[OVS_VPORT_ATTR_MAX + 1]) +{ + struct datapath *dp; + struct vport *vport; + + if (a[OVS_VPORT_ATTR_NAME]) { + vport = ovs_vport_locate(nla_data(a[OVS_VPORT_ATTR_NAME])); + if (!vport) + return ERR_PTR(-ENODEV); + return vport; + } else if (a[OVS_VPORT_ATTR_PORT_NO]) { + u32 port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]); + + if (port_no >= DP_MAX_PORTS) + return ERR_PTR(-EFBIG); + + dp = get_dp(ovs_header->dp_ifindex); + if (!dp) + return ERR_PTR(-ENODEV); + + vport = rcu_dereference_rtnl(dp->ports[port_no]); + if (!vport) + return ERR_PTR(-ENOENT); + return vport; + } else + return ERR_PTR(-EINVAL); +} + +static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info) +{ + struct nlattr **a = info->attrs; + struct ovs_header *ovs_header = info->userhdr; + struct vport_parms parms; + struct sk_buff *reply; + struct vport *vport; + struct datapath *dp; + u32 port_no; + int err; + + err = -EINVAL; + if (!a[OVS_VPORT_ATTR_NAME] || !a[OVS_VPORT_ATTR_TYPE] || + !a[OVS_VPORT_ATTR_UPCALL_PID]) + goto exit; + + rtnl_lock(); + dp = get_dp(ovs_header->dp_ifindex); + err = -ENODEV; + if (!dp) + goto exit_unlock; + + if (a[OVS_VPORT_ATTR_PORT_NO]) { + port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]); + + err = -EFBIG; + if (port_no >= DP_MAX_PORTS) + goto exit_unlock; + + vport = rtnl_dereference(dp->ports[port_no]); + err = -EBUSY; + if (vport) + goto exit_unlock; + } else { + for (port_no = 1; ; port_no++) { + if (port_no >= DP_MAX_PORTS) { + err = -EFBIG; + goto exit_unlock; + } + vport = rtnl_dereference(dp->ports[port_no]); + if (!vport) + break; + } + } + + parms.name = nla_data(a[OVS_VPORT_ATTR_NAME]); + parms.type = nla_get_u32(a[OVS_VPORT_ATTR_TYPE]); + parms.options = a[OVS_VPORT_ATTR_OPTIONS]; + parms.dp = dp; + parms.port_no = port_no; + parms.upcall_pid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]); + + vport = new_vport(&parms); + err = PTR_ERR(vport); + if (IS_ERR(vport)) + goto exit_unlock; + + reply = ovs_vport_cmd_build_info(vport, info->snd_pid, info->snd_seq, + OVS_VPORT_CMD_NEW); + if (IS_ERR(reply)) { + err = PTR_ERR(reply); + ovs_dp_detach_port(vport); + goto exit_unlock; + } + genl_notify(reply, genl_info_net(info), info->snd_pid, + ovs_dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL); + +exit_unlock: + rtnl_unlock(); +exit: + return err; +} + +static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info) +{ + struct nlattr **a = info->attrs; + struct sk_buff *reply; + struct vport *vport; + int err; + + rtnl_lock(); + vport = lookup_vport(info->userhdr, a); + err = PTR_ERR(vport); + if (IS_ERR(vport)) + goto exit_unlock; + + err = 0; + if (a[OVS_VPORT_ATTR_TYPE] && + nla_get_u32(a[OVS_VPORT_ATTR_TYPE]) != vport->ops->type) + err = -EINVAL; + + if (!err && a[OVS_VPORT_ATTR_OPTIONS]) + err = ovs_vport_set_options(vport, a[OVS_VPORT_ATTR_OPTIONS]); + if (!err && a[OVS_VPORT_ATTR_UPCALL_PID]) + vport->upcall_pid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]); + + reply = ovs_vport_cmd_build_info(vport, info->snd_pid, info->snd_seq, + OVS_VPORT_CMD_NEW); + if (IS_ERR(reply)) { + err = PTR_ERR(reply); + netlink_set_err(init_net.genl_sock, 0, + ovs_dp_vport_multicast_group.id, err); + return 0; + } + + genl_notify(reply, genl_info_net(info), info->snd_pid, + ovs_dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL); + +exit_unlock: + rtnl_unlock(); + return err; +} + +static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info) +{ + struct nlattr **a = info->attrs; + struct sk_buff *reply; + struct vport *vport; + int err; + + rtnl_lock(); + vport = lookup_vport(info->userhdr, a); + err = PTR_ERR(vport); + if (IS_ERR(vport)) + goto exit_unlock; + + if (vport->port_no == OVSP_LOCAL) { + err = -EINVAL; + goto exit_unlock; + } + + reply = ovs_vport_cmd_build_info(vport, info->snd_pid, info->snd_seq, + OVS_VPORT_CMD_DEL); + err = PTR_ERR(reply); + if (IS_ERR(reply)) + goto exit_unlock; + + ovs_dp_detach_port(vport); + + genl_notify(reply, genl_info_net(info), info->snd_pid, + ovs_dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL); + +exit_unlock: + rtnl_unlock(); + return err; +} + +static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info) +{ + struct nlattr **a = info->attrs; + struct ovs_header *ovs_header = info->userhdr; + struct sk_buff *reply; + struct vport *vport; + int err; + + rcu_read_lock(); + vport = lookup_vport(ovs_header, a); + err = PTR_ERR(vport); + if (IS_ERR(vport)) + goto exit_unlock; + + reply = ovs_vport_cmd_build_info(vport, info->snd_pid, info->snd_seq, + OVS_VPORT_CMD_NEW); + err = PTR_ERR(reply); + if (IS_ERR(reply)) + goto exit_unlock; + + rcu_read_unlock(); + + return genlmsg_reply(reply, info); + +exit_unlock: + rcu_read_unlock(); + return err; +} + +static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh)); + struct datapath *dp; + u32 port_no; + int retval; + + dp = get_dp(ovs_header->dp_ifindex); + if (!dp) + return -ENODEV; + + rcu_read_lock(); + for (port_no = cb->args[0]; port_no < DP_MAX_PORTS; port_no++) { + struct vport *vport; + + vport = rcu_dereference(dp->ports[port_no]); + if (!vport) + continue; + + if (ovs_vport_cmd_fill_info(vport, skb, NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, NLM_F_MULTI, + OVS_VPORT_CMD_NEW) < 0) + break; + } + rcu_read_unlock(); + + cb->args[0] = port_no; + retval = skb->len; + + return retval; +} + +static void rehash_flow_table(struct work_struct *work) +{ + struct datapath *dp; + + genl_lock(); + + list_for_each_entry(dp, &dps, list_node) { + struct flow_table *old_table = genl_dereference(dp->table); + struct flow_table *new_table; + + new_table = ovs_flow_tbl_rehash(old_table); + if (!IS_ERR(new_table)) { + rcu_assign_pointer(dp->table, new_table); + ovs_flow_tbl_deferred_destroy(old_table); + } + } + + genl_unlock(); + + schedule_delayed_work(&rehash_flow_wq, REHASH_FLOW_INTERVAL); +} + +static struct genl_ops dp_vport_genl_ops[] = { + { .cmd = OVS_VPORT_CMD_NEW, + .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ + .policy = vport_policy, + .doit = ovs_vport_cmd_new + }, + { .cmd = OVS_VPORT_CMD_DEL, + .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ + .policy = vport_policy, + .doit = ovs_vport_cmd_del + }, + { .cmd = OVS_VPORT_CMD_GET, + .flags = 0, /* OK for unprivileged users. */ + .policy = vport_policy, + .doit = ovs_vport_cmd_get, + .dumpit = ovs_vport_cmd_dump + }, + { .cmd = OVS_VPORT_CMD_SET, + .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ + .policy = vport_policy, + .doit = ovs_vport_cmd_set, + }, +}; + +struct genl_family_and_ops { + struct genl_family *family; + struct genl_ops *ops; + int n_ops; + struct genl_multicast_group *group; +}; + +static const struct genl_family_and_ops dp_genl_families[] = { + { &dp_datapath_genl_family, + dp_datapath_genl_ops, ARRAY_SIZE(dp_datapath_genl_ops), + &ovs_dp_datapath_multicast_group }, + { &dp_vport_genl_family, + dp_vport_genl_ops, ARRAY_SIZE(dp_vport_genl_ops), + &ovs_dp_vport_multicast_group }, + { &dp_flow_genl_family, + dp_flow_genl_ops, ARRAY_SIZE(dp_flow_genl_ops), + &ovs_dp_flow_multicast_group }, + { &dp_packet_genl_family, + dp_packet_genl_ops, ARRAY_SIZE(dp_packet_genl_ops), + NULL }, +}; + +static void dp_unregister_genl(int n_families) +{ + int i; + + for (i = 0; i < n_families; i++) + genl_unregister_family(dp_genl_families[i].family); +} + +static int dp_register_genl(void) +{ + int n_registered; + int err; + int i; + + n_registered = 0; + for (i = 0; i < ARRAY_SIZE(dp_genl_families); i++) { + const struct genl_family_and_ops *f = &dp_genl_families[i]; + + err = genl_register_family_with_ops(f->family, f->ops, + f->n_ops); + if (err) + goto error; + n_registered++; + + if (f->group) { + err = genl_register_mc_group(f->family, f->group); + if (err) + goto error; + } + } + + return 0; + +error: + dp_unregister_genl(n_registered); + return err; +} + +static int __init dp_init(void) +{ + struct sk_buff *dummy_skb; + int err; + + BUILD_BUG_ON(sizeof(struct ovs_skb_cb) > sizeof(dummy_skb->cb)); + + pr_info("Open vSwitch switching datapath\n"); + + err = ovs_flow_init(); + if (err) + goto error; + + err = ovs_vport_init(); + if (err) + goto error_flow_exit; + + err = register_netdevice_notifier(&ovs_dp_device_notifier); + if (err) + goto error_vport_exit; + + err = dp_register_genl(); + if (err < 0) + goto error_unreg_notifier; + + schedule_delayed_work(&rehash_flow_wq, REHASH_FLOW_INTERVAL); + + return 0; + +error_unreg_notifier: + unregister_netdevice_notifier(&ovs_dp_device_notifier); +error_vport_exit: + ovs_vport_exit(); +error_flow_exit: + ovs_flow_exit(); +error: + return err; +} + +static void dp_cleanup(void) +{ + cancel_delayed_work_sync(&rehash_flow_wq); + rcu_barrier(); + dp_unregister_genl(ARRAY_SIZE(dp_genl_families)); + unregister_netdevice_notifier(&ovs_dp_device_notifier); + ovs_vport_exit(); + ovs_flow_exit(); +} + +module_init(dp_init); +module_exit(dp_cleanup); + +MODULE_DESCRIPTION("Open vSwitch switching datapath"); +MODULE_LICENSE("GPL"); diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h new file mode 100644 index 000000000000..5b9f884b7055 --- /dev/null +++ b/net/openvswitch/datapath.h @@ -0,0 +1,125 @@ +/* + * Copyright (c) 2007-2011 Nicira Networks. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + */ + +#ifndef DATAPATH_H +#define DATAPATH_H 1 + +#include <asm/page.h> +#include <linux/kernel.h> +#include <linux/mutex.h> +#include <linux/netdevice.h> +#include <linux/skbuff.h> +#include <linux/u64_stats_sync.h> +#include <linux/version.h> + +#include "flow.h" + +struct vport; + +#define DP_MAX_PORTS 1024 +#define SAMPLE_ACTION_DEPTH 3 + +/** + * struct dp_stats_percpu - per-cpu packet processing statistics for a given + * datapath. + * @n_hit: Number of received packets for which a matching flow was found in + * the flow table. + * @n_miss: Number of received packets that had no matching flow in the flow + * table. The sum of @n_hit and @n_miss is the number of packets that have + * been received by the datapath. + * @n_lost: Number of received packets that had no matching flow in the flow + * table that could not be sent to userspace (normally due to an overflow in + * one of the datapath's queues). + */ +struct dp_stats_percpu { + u64 n_hit; + u64 n_missed; + u64 n_lost; + struct u64_stats_sync sync; +}; + +/** + * struct datapath - datapath for flow-based packet switching + * @rcu: RCU callback head for deferred destruction. + * @list_node: Element in global 'dps' list. + * @n_flows: Number of flows currently in flow table. + * @table: Current flow table. Protected by genl_lock and RCU. + * @ports: Map from port number to &struct vport. %OVSP_LOCAL port + * always exists, other ports may be %NULL. Protected by RTNL and RCU. + * @port_list: List of all ports in @ports in arbitrary order. RTNL required + * to iterate or modify. + * @stats_percpu: Per-CPU datapath statistics. + * + * Context: See the comment on locking at the top of datapath.c for additional + * locking information. + */ +struct datapath { + struct rcu_head rcu; + struct list_head list_node; + + /* Flow table. */ + struct flow_table __rcu *table; + + /* Switch ports. */ + struct vport __rcu *ports[DP_MAX_PORTS]; + struct list_head port_list; + + /* Stats. */ + struct dp_stats_percpu __percpu *stats_percpu; +}; + +/** + * struct ovs_skb_cb - OVS data in skb CB + * @flow: The flow associated with this packet. May be %NULL if no flow. + */ +struct ovs_skb_cb { + struct sw_flow *flow; +}; +#define OVS_CB(skb) ((struct ovs_skb_cb *)(skb)->cb) + +/** + * struct dp_upcall - metadata to include with a packet to send to userspace + * @cmd: One of %OVS_PACKET_CMD_*. + * @key: Becomes %OVS_PACKET_ATTR_KEY. Must be nonnull. + * @userdata: If nonnull, its u64 value is extracted and passed to userspace as + * %OVS_PACKET_ATTR_USERDATA. + * @pid: Netlink PID to which packet should be sent. If @pid is 0 then no + * packet is sent and the packet is accounted in the datapath's @n_lost + * counter. + */ +struct dp_upcall_info { + u8 cmd; + const struct sw_flow_key *key; + const struct nlattr *userdata; + u32 pid; +}; + +extern struct notifier_block ovs_dp_device_notifier; +extern struct genl_multicast_group ovs_dp_vport_multicast_group; + +void ovs_dp_process_received_packet(struct vport *, struct sk_buff *); +void ovs_dp_detach_port(struct vport *); +int ovs_dp_upcall(struct datapath *, struct sk_buff *, + const struct dp_upcall_info *); + +const char *ovs_dp_name(const struct datapath *dp); +struct sk_buff *ovs_vport_cmd_build_info(struct vport *, u32 pid, u32 seq, + u8 cmd); + +int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb); +#endif /* datapath.h */ diff --git a/net/openvswitch/dp_notify.c b/net/openvswitch/dp_notify.c new file mode 100644 index 000000000000..46736518c453 --- /dev/null +++ b/net/openvswitch/dp_notify.c @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2007-2011 Nicira Networks. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + */ + +#include <linux/netdevice.h> +#include <net/genetlink.h> + +#include "datapath.h" +#include "vport-internal_dev.h" +#include "vport-netdev.h" + +static int dp_device_event(struct notifier_block *unused, unsigned long event, + void *ptr) +{ + struct net_device *dev = ptr; + struct vport *vport; + + if (ovs_is_internal_dev(dev)) + vport = ovs_internal_dev_get_vport(dev); + else + vport = ovs_netdev_get_vport(dev); + + if (!vport) + return NOTIFY_DONE; + + switch (event) { + case NETDEV_UNREGISTER: + if (!ovs_is_internal_dev(dev)) { + struct sk_buff *notify; + + notify = ovs_vport_cmd_build_info(vport, 0, 0, + OVS_VPORT_CMD_DEL); + ovs_dp_detach_port(vport); + if (IS_ERR(notify)) { + netlink_set_err(init_net.genl_sock, 0, + ovs_dp_vport_multicast_group.id, + PTR_ERR(notify)); + break; + } + + genlmsg_multicast(notify, 0, ovs_dp_vport_multicast_group.id, + GFP_KERNEL); + } + break; + } + + return NOTIFY_DONE; +} + +struct notifier_block ovs_dp_device_notifier = { + .notifier_call = dp_device_event +}; diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c new file mode 100644 index 000000000000..fe7f020a843e --- /dev/null +++ b/net/openvswitch/flow.c @@ -0,0 +1,1346 @@ +/* + * Copyright (c) 2007-2011 Nicira Networks. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + */ + +#include "flow.h" +#include "datapath.h" +#include <linux/uaccess.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/if_ether.h> +#include <linux/if_vlan.h> +#include <net/llc_pdu.h> +#include <linux/kernel.h> +#include <linux/jhash.h> +#include <linux/jiffies.h> +#include <linux/llc.h> +#include <linux/module.h> +#include <linux/in.h> +#include <linux/rcupdate.h> +#include <linux/if_arp.h> +#include <linux/if_ether.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/tcp.h> +#include <linux/udp.h> +#include <linux/icmp.h> +#include <linux/icmpv6.h> +#include <linux/rculist.h> +#include <net/ip.h> +#include <net/ipv6.h> +#include <net/ndisc.h> + +static struct kmem_cache *flow_cache; + +static int check_header(struct sk_buff *skb, int len) +{ + if (unlikely(skb->len < len)) + return -EINVAL; + if (unlikely(!pskb_may_pull(skb, len))) + return -ENOMEM; + return 0; +} + +static bool arphdr_ok(struct sk_buff *skb) +{ + return pskb_may_pull(skb, skb_network_offset(skb) + + sizeof(struct arp_eth_header)); +} + +static int check_iphdr(struct sk_buff *skb) +{ + unsigned int nh_ofs = skb_network_offset(skb); + unsigned int ip_len; + int err; + + err = check_header(skb, nh_ofs + sizeof(struct iphdr)); + if (unlikely(err)) + return err; + + ip_len = ip_hdrlen(skb); + if (unlikely(ip_len < sizeof(struct iphdr) || + skb->len < nh_ofs + ip_len)) + return -EINVAL; + + skb_set_transport_header(skb, nh_ofs + ip_len); + return 0; +} + +static bool tcphdr_ok(struct sk_buff *skb) +{ + int th_ofs = skb_transport_offset(skb); + int tcp_len; + + if (unlikely(!pskb_may_pull(skb, th_ofs + sizeof(struct tcphdr)))) + return false; + + tcp_len = tcp_hdrlen(skb); + if (unlikely(tcp_len < sizeof(struct tcphdr) || + skb->len < th_ofs + tcp_len)) + return false; + + return true; +} + +static bool udphdr_ok(struct sk_buff *skb) +{ + return pskb_may_pull(skb, skb_transport_offset(skb) + + sizeof(struct udphdr)); +} + +static bool icmphdr_ok(struct sk_buff *skb) +{ + return pskb_may_pull(skb, skb_transport_offset(skb) + + sizeof(struct icmphdr)); +} + +u64 ovs_flow_used_time(unsigned long flow_jiffies) +{ + struct timespec cur_ts; + u64 cur_ms, idle_ms; + + ktime_get_ts(&cur_ts); + idle_ms = jiffies_to_msecs(jiffies - flow_jiffies); + cur_ms = (u64)cur_ts.tv_sec * MSEC_PER_SEC + + cur_ts.tv_nsec / NSEC_PER_MSEC; + + return cur_ms - idle_ms; +} + +#define SW_FLOW_KEY_OFFSET(field) \ + (offsetof(struct sw_flow_key, field) + \ + FIELD_SIZEOF(struct sw_flow_key, field)) + +static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key, + int *key_lenp) +{ + unsigned int nh_ofs = skb_network_offset(skb); + unsigned int nh_len; + int payload_ofs; + struct ipv6hdr *nh; + uint8_t nexthdr; + __be16 frag_off; + int err; + + *key_lenp = SW_FLOW_KEY_OFFSET(ipv6.label); + + err = check_header(skb, nh_ofs + sizeof(*nh)); + if (unlikely(err)) + return err; + + nh = ipv6_hdr(skb); + nexthdr = nh->nexthdr; + payload_ofs = (u8 *)(nh + 1) - skb->data; + + key->ip.proto = NEXTHDR_NONE; + key->ip.tos = ipv6_get_dsfield(nh); + key->ip.ttl = nh->hop_limit; + key->ipv6.label = *(__be32 *)nh & htonl(IPV6_FLOWINFO_FLOWLABEL); + key->ipv6.addr.src = nh->saddr; + key->ipv6.addr.dst = nh->daddr; + + payload_ofs = ipv6_skip_exthdr(skb, payload_ofs, &nexthdr, &frag_off); + if (unlikely(payload_ofs < 0)) + return -EINVAL; + + if (frag_off) { + if (frag_off & htons(~0x7)) + key->ip.frag = OVS_FRAG_TYPE_LATER; + else + key->ip.frag = OVS_FRAG_TYPE_FIRST; + } + + nh_len = payload_ofs - nh_ofs; + skb_set_transport_header(skb, nh_ofs + nh_len); + key->ip.proto = nexthdr; + return nh_len; +} + +static bool icmp6hdr_ok(struct sk_buff *skb) +{ + return pskb_may_pull(skb, skb_transport_offset(skb) + + sizeof(struct icmp6hdr)); +} + +#define TCP_FLAGS_OFFSET 13 +#define TCP_FLAG_MASK 0x3f + +void ovs_flow_used(struct sw_flow *flow, struct sk_buff *skb) +{ + u8 tcp_flags = 0; + + if (flow->key.eth.type == htons(ETH_P_IP) && + flow->key.ip.proto == IPPROTO_TCP) { + u8 *tcp = (u8 *)tcp_hdr(skb); + tcp_flags = *(tcp + TCP_FLAGS_OFFSET) & TCP_FLAG_MASK; + } + + spin_lock(&flow->lock); + flow->used = jiffies; + flow->packet_count++; + flow->byte_count += skb->len; + flow->tcp_flags |= tcp_flags; + spin_unlock(&flow->lock); +} + +struct sw_flow_actions *ovs_flow_actions_alloc(const struct nlattr *actions) +{ + int actions_len = nla_len(actions); + struct sw_flow_actions *sfa; + + /* At least DP_MAX_PORTS actions are required to be able to flood a + * packet to every port. Factor of 2 allows for setting VLAN tags, + * etc. */ + if (actions_len > 2 * DP_MAX_PORTS * nla_total_size(4)) + return ERR_PTR(-EINVAL); + + sfa = kmalloc(sizeof(*sfa) + actions_len, GFP_KERNEL); + if (!sfa) + return ERR_PTR(-ENOMEM); + + sfa->actions_len = actions_len; + memcpy(sfa->actions, nla_data(actions), actions_len); + return sfa; +} + +struct sw_flow *ovs_flow_alloc(void) +{ + struct sw_flow *flow; + + flow = kmem_cache_alloc(flow_cache, GFP_KERNEL); + if (!flow) + return ERR_PTR(-ENOMEM); + + spin_lock_init(&flow->lock); + flow->sf_acts = NULL; + + return flow; +} + +static struct hlist_head *find_bucket(struct flow_table *table, u32 hash) +{ + hash = jhash_1word(hash, table->hash_seed); + return flex_array_get(table->buckets, + (hash & (table->n_buckets - 1))); +} + +static struct flex_array *alloc_buckets(unsigned int n_buckets) +{ + struct flex_array *buckets; + int i, err; + + buckets = flex_array_alloc(sizeof(struct hlist_head *), + n_buckets, GFP_KERNEL); + if (!buckets) + return NULL; + + err = flex_array_prealloc(buckets, 0, n_buckets, GFP_KERNEL); + if (err) { + flex_array_free(buckets); + return NULL; + } + + for (i = 0; i < n_buckets; i++) + INIT_HLIST_HEAD((struct hlist_head *) + flex_array_get(buckets, i)); + + return buckets; +} + +static void free_buckets(struct flex_array *buckets) +{ + flex_array_free(buckets); +} + +struct flow_table *ovs_flow_tbl_alloc(int new_size) +{ + struct flow_table *table = kmalloc(sizeof(*table), GFP_KERNEL); + + if (!table) + return NULL; + + table->buckets = alloc_buckets(new_size); + + if (!table->buckets) { + kfree(table); + return NULL; + } + table->n_buckets = new_size; + table->count = 0; + table->node_ver = 0; + table->keep_flows = false; + get_random_bytes(&table->hash_seed, sizeof(u32)); + + return table; +} + +void ovs_flow_tbl_destroy(struct flow_table *table) +{ + int i; + + if (!table) + return; + + if (table->keep_flows) + goto skip_flows; + + for (i = 0; i < table->n_buckets; i++) { + struct sw_flow *flow; + struct hlist_head *head = flex_array_get(table->buckets, i); + struct hlist_node *node, *n; + int ver = table->node_ver; + + hlist_for_each_entry_safe(flow, node, n, head, hash_node[ver]) { + hlist_del_rcu(&flow->hash_node[ver]); + ovs_flow_free(flow); + } + } + +skip_flows: + free_buckets(table->buckets); + kfree(table); +} + +static void flow_tbl_destroy_rcu_cb(struct rcu_head *rcu) +{ + struct flow_table *table = container_of(rcu, struct flow_table, rcu); + + ovs_flow_tbl_destroy(table); +} + +void ovs_flow_tbl_deferred_destroy(struct flow_table *table) +{ + if (!table) + return; + + call_rcu(&table->rcu, flow_tbl_destroy_rcu_cb); +} + +struct sw_flow *ovs_flow_tbl_next(struct flow_table *table, u32 *bucket, u32 *last) +{ + struct sw_flow *flow; + struct hlist_head *head; + struct hlist_node *n; + int ver; + int i; + + ver = table->node_ver; + while (*bucket < table->n_buckets) { + i = 0; + head = flex_array_get(table->buckets, *bucket); + hlist_for_each_entry_rcu(flow, n, head, hash_node[ver]) { + if (i < *last) { + i++; + continue; + } + *last = i + 1; + return flow; + } + (*bucket)++; + *last = 0; + } + + return NULL; +} + +static void flow_table_copy_flows(struct flow_table *old, struct flow_table *new) +{ + int old_ver; + int i; + + old_ver = old->node_ver; + new->node_ver = !old_ver; + + /* Insert in new table. */ + for (i = 0; i < old->n_buckets; i++) { + struct sw_flow *flow; + struct hlist_head *head; + struct hlist_node *n; + + head = flex_array_get(old->buckets, i); + + hlist_for_each_entry(flow, n, head, hash_node[old_ver]) + ovs_flow_tbl_insert(new, flow); + } + old->keep_flows = true; +} + +static struct flow_table *__flow_tbl_rehash(struct flow_table *table, int n_buckets) +{ + struct flow_table *new_table; + + new_table = ovs_flow_tbl_alloc(n_buckets); + if (!new_table) + return ERR_PTR(-ENOMEM); + + flow_table_copy_flows(table, new_table); + + return new_table; +} + +struct flow_table *ovs_flow_tbl_rehash(struct flow_table *table) +{ + return __flow_tbl_rehash(table, table->n_buckets); +} + +struct flow_table *ovs_flow_tbl_expand(struct flow_table *table) +{ + return __flow_tbl_rehash(table, table->n_buckets * 2); +} + +void ovs_flow_free(struct sw_flow *flow) +{ + if (unlikely(!flow)) + return; + + kfree((struct sf_flow_acts __force *)flow->sf_acts); + kmem_cache_free(flow_cache, flow); +} + +/* RCU callback used by ovs_flow_deferred_free. */ +static void rcu_free_flow_callback(struct rcu_head *rcu) +{ + struct sw_flow *flow = container_of(rcu, struct sw_flow, rcu); + + ovs_flow_free(flow); +} + +/* Schedules 'flow' to be freed after the next RCU grace period. + * The caller must hold rcu_read_lock for this to be sensible. */ +void ovs_flow_deferred_free(struct sw_flow *flow) +{ + call_rcu(&flow->rcu, rcu_free_flow_callback); +} + +/* RCU callback used by ovs_flow_deferred_free_acts. */ +static void rcu_free_acts_callback(struct rcu_head *rcu) +{ + struct sw_flow_actions *sf_acts = container_of(rcu, + struct sw_flow_actions, rcu); + kfree(sf_acts); +} + +/* Schedules 'sf_acts' to be freed after the next RCU grace period. + * The caller must hold rcu_read_lock for this to be sensible. */ +void ovs_flow_deferred_free_acts(struct sw_flow_actions *sf_acts) +{ + call_rcu(&sf_acts->rcu, rcu_free_acts_callback); +} + +static int parse_vlan(struct sk_buff *skb, struct sw_flow_key *key) +{ + struct qtag_prefix { + __be16 eth_type; /* ETH_P_8021Q */ + __be16 tci; + }; + struct qtag_prefix *qp; + + if (unlikely(skb->len < sizeof(struct qtag_prefix) + sizeof(__be16))) + return 0; + + if (unlikely(!pskb_may_pull(skb, sizeof(struct qtag_prefix) + + sizeof(__be16)))) + return -ENOMEM; + + qp = (struct qtag_prefix *) skb->data; + key->eth.tci = qp->tci | htons(VLAN_TAG_PRESENT); + __skb_pull(skb, sizeof(struct qtag_prefix)); + + return 0; +} + +static __be16 parse_ethertype(struct sk_buff *skb) +{ + struct llc_snap_hdr { + u8 dsap; /* Always 0xAA */ + u8 ssap; /* Always 0xAA */ + u8 ctrl; + u8 oui[3]; + __be16 ethertype; + }; + struct llc_snap_hdr *llc; + __be16 proto; + + proto = *(__be16 *) skb->data; + __skb_pull(skb, sizeof(__be16)); + + if (ntohs(proto) >= 1536) + return proto; + + if (skb->len < sizeof(struct llc_snap_hdr)) + return htons(ETH_P_802_2); + + if (unlikely(!pskb_may_pull(skb, sizeof(struct llc_snap_hdr)))) + return htons(0); + + llc = (struct llc_snap_hdr *) skb->data; + if (llc->dsap != LLC_SAP_SNAP || + llc->ssap != LLC_SAP_SNAP || + (llc->oui[0] | llc->oui[1] | llc->oui[2]) != 0) + return htons(ETH_P_802_2); + + __skb_pull(skb, sizeof(struct llc_snap_hdr)); + return llc->ethertype; +} + +static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key, + int *key_lenp, int nh_len) +{ + struct icmp6hdr *icmp = icmp6_hdr(skb); + int error = 0; + int key_len; + + /* The ICMPv6 type and code fields use the 16-bit transport port + * fields, so we need to store them in 16-bit network byte order. + */ + key->ipv6.tp.src = htons(icmp->icmp6_type); + key->ipv6.tp.dst = htons(icmp->icmp6_code); + key_len = SW_FLOW_KEY_OFFSET(ipv6.tp); + + if (icmp->icmp6_code == 0 && + (icmp->icmp6_type == NDISC_NEIGHBOUR_SOLICITATION || + icmp->icmp6_type == NDISC_NEIGHBOUR_ADVERTISEMENT)) { + int icmp_len = skb->len - skb_transport_offset(skb); + struct nd_msg *nd; + int offset; + + key_len = SW_FLOW_KEY_OFFSET(ipv6.nd); + + /* In order to process neighbor discovery options, we need the + * entire packet. + */ + if (unlikely(icmp_len < sizeof(*nd))) + goto out; + if (unlikely(skb_linearize(skb))) { + error = -ENOMEM; + goto out; + } + + nd = (struct nd_msg *)skb_transport_header(skb); + key->ipv6.nd.target = nd->target; + key_len = SW_FLOW_KEY_OFFSET(ipv6.nd); + + icmp_len -= sizeof(*nd); + offset = 0; + while (icmp_len >= 8) { + struct nd_opt_hdr *nd_opt = + (struct nd_opt_hdr *)(nd->opt + offset); + int opt_len = nd_opt->nd_opt_len * 8; + + if (unlikely(!opt_len || opt_len > icmp_len)) + goto invalid; + + /* Store the link layer address if the appropriate + * option is provided. It is considered an error if + * the same link layer option is specified twice. + */ + if (nd_opt->nd_opt_type == ND_OPT_SOURCE_LL_ADDR + && opt_len == 8) { + if (unlikely(!is_zero_ether_addr(key->ipv6.nd.sll))) + goto invalid; + memcpy(key->ipv6.nd.sll, + &nd->opt[offset+sizeof(*nd_opt)], ETH_ALEN); + } else if (nd_opt->nd_opt_type == ND_OPT_TARGET_LL_ADDR + && opt_len == 8) { + if (unlikely(!is_zero_ether_addr(key->ipv6.nd.tll))) + goto invalid; + memcpy(key->ipv6.nd.tll, + &nd->opt[offset+sizeof(*nd_opt)], ETH_ALEN); + } + + icmp_len -= opt_len; + offset += opt_len; + } + } + + goto out; + +invalid: + memset(&key->ipv6.nd.target, 0, sizeof(key->ipv6.nd.target)); + memset(key->ipv6.nd.sll, 0, sizeof(key->ipv6.nd.sll)); + memset(key->ipv6.nd.tll, 0, sizeof(key->ipv6.nd.tll)); + +out: + *key_lenp = key_len; + return error; +} + +/** + * ovs_flow_extract - extracts a flow key from an Ethernet frame. + * @skb: sk_buff that contains the frame, with skb->data pointing to the + * Ethernet header + * @in_port: port number on which @skb was received. + * @key: output flow key + * @key_lenp: length of output flow key + * + * The caller must ensure that skb->len >= ETH_HLEN. + * + * Returns 0 if successful, otherwise a negative errno value. + * + * Initializes @skb header pointers as follows: + * + * - skb->mac_header: the Ethernet header. + * + * - skb->network_header: just past the Ethernet header, or just past the + * VLAN header, to the first byte of the Ethernet payload. + * + * - skb->transport_header: If key->dl_type is ETH_P_IP or ETH_P_IPV6 + * on output, then just past the IP header, if one is present and + * of a correct length, otherwise the same as skb->network_header. + * For other key->dl_type values it is left untouched. + */ +int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key, + int *key_lenp) +{ + int error = 0; + int key_len = SW_FLOW_KEY_OFFSET(eth); + struct ethhdr *eth; + + memset(key, 0, sizeof(*key)); + + key->phy.priority = skb->priority; + key->phy.in_port = in_port; + + skb_reset_mac_header(skb); + + /* Link layer. We are guaranteed to have at least the 14 byte Ethernet + * header in the linear data area. + */ + eth = eth_hdr(skb); + memcpy(key->eth.src, eth->h_source, ETH_ALEN); + memcpy(key->eth.dst, eth->h_dest, ETH_ALEN); + + __skb_pull(skb, 2 * ETH_ALEN); + + if (vlan_tx_tag_present(skb)) + key->eth.tci = htons(skb->vlan_tci); + else if (eth->h_proto == htons(ETH_P_8021Q)) + if (unlikely(parse_vlan(skb, key))) + return -ENOMEM; + + key->eth.type = parse_ethertype(skb); + if (unlikely(key->eth.type == htons(0))) + return -ENOMEM; + + skb_reset_network_header(skb); + __skb_push(skb, skb->data - skb_mac_header(skb)); + + /* Network layer. */ + if (key->eth.type == htons(ETH_P_IP)) { + struct iphdr *nh; + __be16 offset; + + key_len = SW_FLOW_KEY_OFFSET(ipv4.addr); + + error = check_iphdr(skb); + if (unlikely(error)) { + if (error == -EINVAL) { + skb->transport_header = skb->network_header; + error = 0; + } + goto out; + } + + nh = ip_hdr(skb); + key->ipv4.addr.src = nh->saddr; + key->ipv4.addr.dst = nh->daddr; + + key->ip.proto = nh->protocol; + key->ip.tos = nh->tos; + key->ip.ttl = nh->ttl; + + offset = nh->frag_off & htons(IP_OFFSET); + if (offset) { + key->ip.frag = OVS_FRAG_TYPE_LATER; + goto out; + } + if (nh->frag_off & htons(IP_MF) || + skb_shinfo(skb)->gso_type & SKB_GSO_UDP) + key->ip.frag = OVS_FRAG_TYPE_FIRST; + + /* Transport layer. */ + if (key->ip.proto == IPPROTO_TCP) { + key_len = SW_FLOW_KEY_OFFSET(ipv4.tp); + if (tcphdr_ok(skb)) { + struct tcphdr *tcp = tcp_hdr(skb); + key->ipv4.tp.src = tcp->source; + key->ipv4.tp.dst = tcp->dest; + } + } else if (key->ip.proto == IPPROTO_UDP) { + key_len = SW_FLOW_KEY_OFFSET(ipv4.tp); + if (udphdr_ok(skb)) { + struct udphdr *udp = udp_hdr(skb); + key->ipv4.tp.src = udp->source; + key->ipv4.tp.dst = udp->dest; + } + } else if (key->ip.proto == IPPROTO_ICMP) { + key_len = SW_FLOW_KEY_OFFSET(ipv4.tp); + if (icmphdr_ok(skb)) { + struct icmphdr *icmp = icmp_hdr(skb); + /* The ICMP type and code fields use the 16-bit + * transport port fields, so we need to store + * them in 16-bit network byte order. */ + key->ipv4.tp.src = htons(icmp->type); + key->ipv4.tp.dst = htons(icmp->code); + } + } + + } else if (key->eth.type == htons(ETH_P_ARP) && arphdr_ok(skb)) { + struct arp_eth_header *arp; + + arp = (struct arp_eth_header *)skb_network_header(skb); + + if (arp->ar_hrd == htons(ARPHRD_ETHER) + && arp->ar_pro == htons(ETH_P_IP) + && arp->ar_hln == ETH_ALEN + && arp->ar_pln == 4) { + + /* We only match on the lower 8 bits of the opcode. */ + if (ntohs(arp->ar_op) <= 0xff) + key->ip.proto = ntohs(arp->ar_op); + + if (key->ip.proto == ARPOP_REQUEST + || key->ip.proto == ARPOP_REPLY) { + memcpy(&key->ipv4.addr.src, arp->ar_sip, sizeof(key->ipv4.addr.src)); + memcpy(&key->ipv4.addr.dst, arp->ar_tip, sizeof(key->ipv4.addr.dst)); + memcpy(key->ipv4.arp.sha, arp->ar_sha, ETH_ALEN); + memcpy(key->ipv4.arp.tha, arp->ar_tha, ETH_ALEN); + key_len = SW_FLOW_KEY_OFFSET(ipv4.arp); + } + } + } else if (key->eth.type == htons(ETH_P_IPV6)) { + int nh_len; /* IPv6 Header + Extensions */ + + nh_len = parse_ipv6hdr(skb, key, &key_len); + if (unlikely(nh_len < 0)) { + if (nh_len == -EINVAL) + skb->transport_header = skb->network_header; + else + error = nh_len; + goto out; + } + + if (key->ip.frag == OVS_FRAG_TYPE_LATER) + goto out; + if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP) + key->ip.frag = OVS_FRAG_TYPE_FIRST; + + /* Transport layer. */ + if (key->ip.proto == NEXTHDR_TCP) { + key_len = SW_FLOW_KEY_OFFSET(ipv6.tp); + if (tcphdr_ok(skb)) { + struct tcphdr *tcp = tcp_hdr(skb); + key->ipv6.tp.src = tcp->source; + key->ipv6.tp.dst = tcp->dest; + } + } else if (key->ip.proto == NEXTHDR_UDP) { + key_len = SW_FLOW_KEY_OFFSET(ipv6.tp); + if (udphdr_ok(skb)) { + struct udphdr *udp = udp_hdr(skb); + key->ipv6.tp.src = udp->source; + key->ipv6.tp.dst = udp->dest; + } + } else if (key->ip.proto == NEXTHDR_ICMP) { + key_len = SW_FLOW_KEY_OFFSET(ipv6.tp); + if (icmp6hdr_ok(skb)) { + error = parse_icmpv6(skb, key, &key_len, nh_len); + if (error < 0) + goto out; + } + } + } + +out: + *key_lenp = key_len; + return error; +} + +u32 ovs_flow_hash(const struct sw_flow_key *key, int key_len) +{ + return jhash2((u32 *)key, DIV_ROUND_UP(key_len, sizeof(u32)), 0); +} + +struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *table, + struct sw_flow_key *key, int key_len) +{ + struct sw_flow *flow; + struct hlist_node *n; + struct hlist_head *head; + u32 hash; + + hash = ovs_flow_hash(key, key_len); + + head = find_bucket(table, hash); + hlist_for_each_entry_rcu(flow, n, head, hash_node[table->node_ver]) { + + if (flow->hash == hash && + !memcmp(&flow->key, key, key_len)) { + return flow; + } + } + return NULL; +} + +void ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow) +{ + struct hlist_head *head; + + head = find_bucket(table, flow->hash); + hlist_add_head_rcu(&flow->hash_node[table->node_ver], head); + table->count++; +} + +void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow) +{ + hlist_del_rcu(&flow->hash_node[table->node_ver]); + table->count--; + BUG_ON(table->count < 0); +} + +/* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute. */ +const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = { + [OVS_KEY_ATTR_ENCAP] = -1, + [OVS_KEY_ATTR_PRIORITY] = sizeof(u32), + [OVS_KEY_ATTR_IN_PORT] = sizeof(u32), + [OVS_KEY_ATTR_ETHERNET] = sizeof(struct ovs_key_ethernet), + [OVS_KEY_ATTR_VLAN] = sizeof(__be16), + [OVS_KEY_ATTR_ETHERTYPE] = sizeof(__be16), + [OVS_KEY_ATTR_IPV4] = sizeof(struct ovs_key_ipv4), + [OVS_KEY_ATTR_IPV6] = sizeof(struct ovs_key_ipv6), + [OVS_KEY_ATTR_TCP] = sizeof(struct ovs_key_tcp), + [OVS_KEY_ATTR_UDP] = sizeof(struct ovs_key_udp), + [OVS_KEY_ATTR_ICMP] = sizeof(struct ovs_key_icmp), + [OVS_KEY_ATTR_ICMPV6] = sizeof(struct ovs_key_icmpv6), + [OVS_KEY_ATTR_ARP] = sizeof(struct ovs_key_arp), + [OVS_KEY_ATTR_ND] = sizeof(struct ovs_key_nd), +}; + +static int ipv4_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_len, + const struct nlattr *a[], u32 *attrs) +{ + const struct ovs_key_icmp *icmp_key; + const struct ovs_key_tcp *tcp_key; + const struct ovs_key_udp *udp_key; + + switch (swkey->ip.proto) { + case IPPROTO_TCP: + if (!(*attrs & (1 << OVS_KEY_ATTR_TCP))) + return -EINVAL; + *attrs &= ~(1 << OVS_KEY_ATTR_TCP); + + *key_len = SW_FLOW_KEY_OFFSET(ipv4.tp); + tcp_key = nla_data(a[OVS_KEY_ATTR_TCP]); + swkey->ipv4.tp.src = tcp_key->tcp_src; + swkey->ipv4.tp.dst = tcp_key->tcp_dst; + break; + + case IPPROTO_UDP: + if (!(*attrs & (1 << OVS_KEY_ATTR_UDP))) + return -EINVAL; + *attrs &= ~(1 << OVS_KEY_ATTR_UDP); + + *key_len = SW_FLOW_KEY_OFFSET(ipv4.tp); + udp_key = nla_data(a[OVS_KEY_ATTR_UDP]); + swkey->ipv4.tp.src = udp_key->udp_src; + swkey->ipv4.tp.dst = udp_key->udp_dst; + break; + + case IPPROTO_ICMP: + if (!(*attrs & (1 << OVS_KEY_ATTR_ICMP))) + return -EINVAL; + *attrs &= ~(1 << OVS_KEY_ATTR_ICMP); + + *key_len = SW_FLOW_KEY_OFFSET(ipv4.tp); + icmp_key = nla_data(a[OVS_KEY_ATTR_ICMP]); + swkey->ipv4.tp.src = htons(icmp_key->icmp_type); + swkey->ipv4.tp.dst = htons(icmp_key->icmp_code); + break; + } + + return 0; +} + +static int ipv6_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_len, + const struct nlattr *a[], u32 *attrs) +{ + const struct ovs_key_icmpv6 *icmpv6_key; + const struct ovs_key_tcp *tcp_key; + const struct ovs_key_udp *udp_key; + + switch (swkey->ip.proto) { + case IPPROTO_TCP: + if (!(*attrs & (1 << OVS_KEY_ATTR_TCP))) + return -EINVAL; + *attrs &= ~(1 << OVS_KEY_ATTR_TCP); + + *key_len = SW_FLOW_KEY_OFFSET(ipv6.tp); + tcp_key = nla_data(a[OVS_KEY_ATTR_TCP]); + swkey->ipv6.tp.src = tcp_key->tcp_src; + swkey->ipv6.tp.dst = tcp_key->tcp_dst; + break; + + case IPPROTO_UDP: + if (!(*attrs & (1 << OVS_KEY_ATTR_UDP))) + return -EINVAL; + *attrs &= ~(1 << OVS_KEY_ATTR_UDP); + + *key_len = SW_FLOW_KEY_OFFSET(ipv6.tp); + udp_key = nla_data(a[OVS_KEY_ATTR_UDP]); + swkey->ipv6.tp.src = udp_key->udp_src; + swkey->ipv6.tp.dst = udp_key->udp_dst; + break; + + case IPPROTO_ICMPV6: + if (!(*attrs & (1 << OVS_KEY_ATTR_ICMPV6))) + return -EINVAL; + *attrs &= ~(1 << OVS_KEY_ATTR_ICMPV6); + + *key_len = SW_FLOW_KEY_OFFSET(ipv6.tp); + icmpv6_key = nla_data(a[OVS_KEY_ATTR_ICMPV6]); + swkey->ipv6.tp.src = htons(icmpv6_key->icmpv6_type); + swkey->ipv6.tp.dst = htons(icmpv6_key->icmpv6_code); + + if (swkey->ipv6.tp.src == htons(NDISC_NEIGHBOUR_SOLICITATION) || + swkey->ipv6.tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) { + const struct ovs_key_nd *nd_key; + + if (!(*attrs & (1 << OVS_KEY_ATTR_ND))) + return -EINVAL; + *attrs &= ~(1 << OVS_KEY_ATTR_ND); + + *key_len = SW_FLOW_KEY_OFFSET(ipv6.nd); + nd_key = nla_data(a[OVS_KEY_ATTR_ND]); + memcpy(&swkey->ipv6.nd.target, nd_key->nd_target, + sizeof(swkey->ipv6.nd.target)); + memcpy(swkey->ipv6.nd.sll, nd_key->nd_sll, ETH_ALEN); + memcpy(swkey->ipv6.nd.tll, nd_key->nd_tll, ETH_ALEN); + } + break; + } + + return 0; +} + +static int parse_flow_nlattrs(const struct nlattr *attr, + const struct nlattr *a[], u32 *attrsp) +{ + const struct nlattr *nla; + u32 attrs; + int rem; + + attrs = 0; + nla_for_each_nested(nla, attr, rem) { + u16 type = nla_type(nla); + int expected_len; + + if (type > OVS_KEY_ATTR_MAX || attrs & (1 << type)) + return -EINVAL; + + expected_len = ovs_key_lens[type]; + if (nla_len(nla) != expected_len && expected_len != -1) + return -EINVAL; + + attrs |= 1 << type; + a[type] = nla; + } + if (rem) + return -EINVAL; + + *attrsp = attrs; + return 0; +} + +/** + * ovs_flow_from_nlattrs - parses Netlink attributes into a flow key. + * @swkey: receives the extracted flow key. + * @key_lenp: number of bytes used in @swkey. + * @attr: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute + * sequence. + */ +int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, + const struct nlattr *attr) +{ + const struct nlattr *a[OVS_KEY_ATTR_MAX + 1]; + const struct ovs_key_ethernet *eth_key; + int key_len; + u32 attrs; + int err; + + memset(swkey, 0, sizeof(struct sw_flow_key)); + key_len = SW_FLOW_KEY_OFFSET(eth); + + err = parse_flow_nlattrs(attr, a, &attrs); + if (err) + return err; + + /* Metadata attributes. */ + if (attrs & (1 << OVS_KEY_ATTR_PRIORITY)) { + swkey->phy.priority = nla_get_u32(a[OVS_KEY_ATTR_PRIORITY]); + attrs &= ~(1 << OVS_KEY_ATTR_PRIORITY); + } + if (attrs & (1 << OVS_KEY_ATTR_IN_PORT)) { + u32 in_port = nla_get_u32(a[OVS_KEY_ATTR_IN_PORT]); + if (in_port >= DP_MAX_PORTS) + return -EINVAL; + swkey->phy.in_port = in_port; + attrs &= ~(1 << OVS_KEY_ATTR_IN_PORT); + } else { + swkey->phy.in_port = USHRT_MAX; + } + + /* Data attributes. */ + if (!(attrs & (1 << OVS_KEY_ATTR_ETHERNET))) + return -EINVAL; + attrs &= ~(1 << OVS_KEY_ATTR_ETHERNET); + + eth_key = nla_data(a[OVS_KEY_ATTR_ETHERNET]); + memcpy(swkey->eth.src, eth_key->eth_src, ETH_ALEN); + memcpy(swkey->eth.dst, eth_key->eth_dst, ETH_ALEN); + + if (attrs & (1u << OVS_KEY_ATTR_ETHERTYPE) && + nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]) == htons(ETH_P_8021Q)) { + const struct nlattr *encap; + __be16 tci; + + if (attrs != ((1 << OVS_KEY_ATTR_VLAN) | + (1 << OVS_KEY_ATTR_ETHERTYPE) | + (1 << OVS_KEY_ATTR_ENCAP))) + return -EINVAL; + + encap = a[OVS_KEY_ATTR_ENCAP]; + tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); + if (tci & htons(VLAN_TAG_PRESENT)) { + swkey->eth.tci = tci; + + err = parse_flow_nlattrs(encap, a, &attrs); + if (err) + return err; + } else if (!tci) { + /* Corner case for truncated 802.1Q header. */ + if (nla_len(encap)) + return -EINVAL; + + swkey->eth.type = htons(ETH_P_8021Q); + *key_lenp = key_len; + return 0; + } else { + return -EINVAL; + } + } + + if (attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) { + swkey->eth.type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]); + if (ntohs(swkey->eth.type) < 1536) + return -EINVAL; + attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE); + } else { + swkey->eth.type = htons(ETH_P_802_2); + } + + if (swkey->eth.type == htons(ETH_P_IP)) { + const struct ovs_key_ipv4 *ipv4_key; + + if (!(attrs & (1 << OVS_KEY_ATTR_IPV4))) + return -EINVAL; + attrs &= ~(1 << OVS_KEY_ATTR_IPV4); + + key_len = SW_FLOW_KEY_OFFSET(ipv4.addr); + ipv4_key = nla_data(a[OVS_KEY_ATTR_IPV4]); + if (ipv4_key->ipv4_frag > OVS_FRAG_TYPE_MAX) + return -EINVAL; + swkey->ip.proto = ipv4_key->ipv4_proto; + swkey->ip.tos = ipv4_key->ipv4_tos; + swkey->ip.ttl = ipv4_key->ipv4_ttl; + swkey->ip.frag = ipv4_key->ipv4_frag; + swkey->ipv4.addr.src = ipv4_key->ipv4_src; + swkey->ipv4.addr.dst = ipv4_key->ipv4_dst; + + if (swkey->ip.frag != OVS_FRAG_TYPE_LATER) { + err = ipv4_flow_from_nlattrs(swkey, &key_len, a, &attrs); + if (err) + return err; + } + } else if (swkey->eth.type == htons(ETH_P_IPV6)) { + const struct ovs_key_ipv6 *ipv6_key; + + if (!(attrs & (1 << OVS_KEY_ATTR_IPV6))) + return -EINVAL; + attrs &= ~(1 << OVS_KEY_ATTR_IPV6); + + key_len = SW_FLOW_KEY_OFFSET(ipv6.label); + ipv6_key = nla_data(a[OVS_KEY_ATTR_IPV6]); + if (ipv6_key->ipv6_frag > OVS_FRAG_TYPE_MAX) + return -EINVAL; + swkey->ipv6.label = ipv6_key->ipv6_label; + swkey->ip.proto = ipv6_key->ipv6_proto; + swkey->ip.tos = ipv6_key->ipv6_tclass; + swkey->ip.ttl = ipv6_key->ipv6_hlimit; + swkey->ip.frag = ipv6_key->ipv6_frag; + memcpy(&swkey->ipv6.addr.src, ipv6_key->ipv6_src, + sizeof(swkey->ipv6.addr.src)); + memcpy(&swkey->ipv6.addr.dst, ipv6_key->ipv6_dst, + sizeof(swkey->ipv6.addr.dst)); + + if (swkey->ip.frag != OVS_FRAG_TYPE_LATER) { + err = ipv6_flow_from_nlattrs(swkey, &key_len, a, &attrs); + if (err) + return err; + } + } else if (swkey->eth.type == htons(ETH_P_ARP)) { + const struct ovs_key_arp *arp_key; + + if (!(attrs & (1 << OVS_KEY_ATTR_ARP))) + return -EINVAL; + attrs &= ~(1 << OVS_KEY_ATTR_ARP); + + key_len = SW_FLOW_KEY_OFFSET(ipv4.arp); + arp_key = nla_data(a[OVS_KEY_ATTR_ARP]); + swkey->ipv4.addr.src = arp_key->arp_sip; + swkey->ipv4.addr.dst = arp_key->arp_tip; + if (arp_key->arp_op & htons(0xff00)) + return -EINVAL; + swkey->ip.proto = ntohs(arp_key->arp_op); + memcpy(swkey->ipv4.arp.sha, arp_key->arp_sha, ETH_ALEN); + memcpy(swkey->ipv4.arp.tha, arp_key->arp_tha, ETH_ALEN); + } + + if (attrs) + return -EINVAL; + *key_lenp = key_len; + + return 0; +} + +/** + * ovs_flow_metadata_from_nlattrs - parses Netlink attributes into a flow key. + * @in_port: receives the extracted input port. + * @key: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute + * sequence. + * + * This parses a series of Netlink attributes that form a flow key, which must + * take the same form accepted by flow_from_nlattrs(), but only enough of it to + * get the metadata, that is, the parts of the flow key that cannot be + * extracted from the packet itself. + */ +int ovs_flow_metadata_from_nlattrs(u32 *priority, u16 *in_port, + const struct nlattr *attr) +{ + const struct nlattr *nla; + int rem; + + *in_port = USHRT_MAX; + *priority = 0; + + nla_for_each_nested(nla, attr, rem) { + int type = nla_type(nla); + + if (type <= OVS_KEY_ATTR_MAX && ovs_key_lens[type] > 0) { + if (nla_len(nla) != ovs_key_lens[type]) + return -EINVAL; + + switch (type) { + case OVS_KEY_ATTR_PRIORITY: + *priority = nla_get_u32(nla); + break; + + case OVS_KEY_ATTR_IN_PORT: + if (nla_get_u32(nla) >= DP_MAX_PORTS) + return -EINVAL; + *in_port = nla_get_u32(nla); + break; + } + } + } + if (rem) + return -EINVAL; + return 0; +} + +int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb) +{ + struct ovs_key_ethernet *eth_key; + struct nlattr *nla, *encap; + + if (swkey->phy.priority) + NLA_PUT_U32(skb, OVS_KEY_ATTR_PRIORITY, swkey->phy.priority); + + if (swkey->phy.in_port != USHRT_MAX) + NLA_PUT_U32(skb, OVS_KEY_ATTR_IN_PORT, swkey->phy.in_port); + + nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key)); + if (!nla) + goto nla_put_failure; + eth_key = nla_data(nla); + memcpy(eth_key->eth_src, swkey->eth.src, ETH_ALEN); + memcpy(eth_key->eth_dst, swkey->eth.dst, ETH_ALEN); + + if (swkey->eth.tci || swkey->eth.type == htons(ETH_P_8021Q)) { + NLA_PUT_BE16(skb, OVS_KEY_ATTR_ETHERTYPE, htons(ETH_P_8021Q)); + NLA_PUT_BE16(skb, OVS_KEY_ATTR_VLAN, swkey->eth.tci); + encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP); + if (!swkey->eth.tci) + goto unencap; + } else { + encap = NULL; + } + + if (swkey->eth.type == htons(ETH_P_802_2)) + goto unencap; + + NLA_PUT_BE16(skb, OVS_KEY_ATTR_ETHERTYPE, swkey->eth.type); + + if (swkey->eth.type == htons(ETH_P_IP)) { + struct ovs_key_ipv4 *ipv4_key; + + nla = nla_reserve(skb, OVS_KEY_ATTR_IPV4, sizeof(*ipv4_key)); + if (!nla) + goto nla_put_failure; + ipv4_key = nla_data(nla); + ipv4_key->ipv4_src = swkey->ipv4.addr.src; + ipv4_key->ipv4_dst = swkey->ipv4.addr.dst; + ipv4_key->ipv4_proto = swkey->ip.proto; + ipv4_key->ipv4_tos = swkey->ip.tos; + ipv4_key->ipv4_ttl = swkey->ip.ttl; + ipv4_key->ipv4_frag = swkey->ip.frag; + } else if (swkey->eth.type == htons(ETH_P_IPV6)) { + struct ovs_key_ipv6 *ipv6_key; + + nla = nla_reserve(skb, OVS_KEY_ATTR_IPV6, sizeof(*ipv6_key)); + if (!nla) + goto nla_put_failure; + ipv6_key = nla_data(nla); + memcpy(ipv6_key->ipv6_src, &swkey->ipv6.addr.src, + sizeof(ipv6_key->ipv6_src)); + memcpy(ipv6_key->ipv6_dst, &swkey->ipv6.addr.dst, + sizeof(ipv6_key->ipv6_dst)); + ipv6_key->ipv6_label = swkey->ipv6.label; + ipv6_key->ipv6_proto = swkey->ip.proto; + ipv6_key->ipv6_tclass = swkey->ip.tos; + ipv6_key->ipv6_hlimit = swkey->ip.ttl; + ipv6_key->ipv6_frag = swkey->ip.frag; + } else if (swkey->eth.type == htons(ETH_P_ARP)) { + struct ovs_key_arp *arp_key; + + nla = nla_reserve(skb, OVS_KEY_ATTR_ARP, sizeof(*arp_key)); + if (!nla) + goto nla_put_failure; + arp_key = nla_data(nla); + memset(arp_key, 0, sizeof(struct ovs_key_arp)); + arp_key->arp_sip = swkey->ipv4.addr.src; + arp_key->arp_tip = swkey->ipv4.addr.dst; + arp_key->arp_op = htons(swkey->ip.proto); + memcpy(arp_key->arp_sha, swkey->ipv4.arp.sha, ETH_ALEN); + memcpy(arp_key->arp_tha, swkey->ipv4.arp.tha, ETH_ALEN); + } + + if ((swkey->eth.type == htons(ETH_P_IP) || + swkey->eth.type == htons(ETH_P_IPV6)) && + swkey->ip.frag != OVS_FRAG_TYPE_LATER) { + + if (swkey->ip.proto == IPPROTO_TCP) { + struct ovs_key_tcp *tcp_key; + + nla = nla_reserve(skb, OVS_KEY_ATTR_TCP, sizeof(*tcp_key)); + if (!nla) + goto nla_put_failure; + tcp_key = nla_data(nla); + if (swkey->eth.type == htons(ETH_P_IP)) { + tcp_key->tcp_src = swkey->ipv4.tp.src; + tcp_key->tcp_dst = swkey->ipv4.tp.dst; + } else if (swkey->eth.type == htons(ETH_P_IPV6)) { + tcp_key->tcp_src = swkey->ipv6.tp.src; + tcp_key->tcp_dst = swkey->ipv6.tp.dst; + } + } else if (swkey->ip.proto == IPPROTO_UDP) { + struct ovs_key_udp *udp_key; + + nla = nla_reserve(skb, OVS_KEY_ATTR_UDP, sizeof(*udp_key)); + if (!nla) + goto nla_put_failure; + udp_key = nla_data(nla); + if (swkey->eth.type == htons(ETH_P_IP)) { + udp_key->udp_src = swkey->ipv4.tp.src; + udp_key->udp_dst = swkey->ipv4.tp.dst; + } else if (swkey->eth.type == htons(ETH_P_IPV6)) { + udp_key->udp_src = swkey->ipv6.tp.src; + udp_key->udp_dst = swkey->ipv6.tp.dst; + } + } else if (swkey->eth.type == htons(ETH_P_IP) && + swkey->ip.proto == IPPROTO_ICMP) { + struct ovs_key_icmp *icmp_key; + + nla = nla_reserve(skb, OVS_KEY_ATTR_ICMP, sizeof(*icmp_key)); + if (!nla) + goto nla_put_failure; + icmp_key = nla_data(nla); + icmp_key->icmp_type = ntohs(swkey->ipv4.tp.src); + icmp_key->icmp_code = ntohs(swkey->ipv4.tp.dst); + } else if (swkey->eth.type == htons(ETH_P_IPV6) && + swkey->ip.proto == IPPROTO_ICMPV6) { + struct ovs_key_icmpv6 *icmpv6_key; + + nla = nla_reserve(skb, OVS_KEY_ATTR_ICMPV6, + sizeof(*icmpv6_key)); + if (!nla) + goto nla_put_failure; + icmpv6_key = nla_data(nla); + icmpv6_key->icmpv6_type = ntohs(swkey->ipv6.tp.src); + icmpv6_key->icmpv6_code = ntohs(swkey->ipv6.tp.dst); + + if (icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_SOLICITATION || + icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_ADVERTISEMENT) { + struct ovs_key_nd *nd_key; + + nla = nla_reserve(skb, OVS_KEY_ATTR_ND, sizeof(*nd_key)); + if (!nla) + goto nla_put_failure; + nd_key = nla_data(nla); + memcpy(nd_key->nd_target, &swkey->ipv6.nd.target, + sizeof(nd_key->nd_target)); + memcpy(nd_key->nd_sll, swkey->ipv6.nd.sll, ETH_ALEN); + memcpy(nd_key->nd_tll, swkey->ipv6.nd.tll, ETH_ALEN); + } + } + } + +unencap: + if (encap) + nla_nest_end(skb, encap); + + return 0; + +nla_put_failure: + return -EMSGSIZE; +} + +/* Initializes the flow module. + * Returns zero if successful or a negative error code. */ +int ovs_flow_init(void) +{ + flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow), 0, + 0, NULL); + if (flow_cache == NULL) + return -ENOMEM; + + return 0; +} + +/* Uninitializes the flow module. */ +void ovs_flow_exit(void) +{ + kmem_cache_destroy(flow_cache); +} diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h new file mode 100644 index 000000000000..2747dc2c4ac1 --- /dev/null +++ b/net/openvswitch/flow.h @@ -0,0 +1,199 @@ +/* + * Copyright (c) 2007-2011 Nicira Networks. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + */ + +#ifndef FLOW_H +#define FLOW_H 1 + +#include <linux/kernel.h> +#include <linux/netlink.h> +#include <linux/openvswitch.h> +#include <linux/spinlock.h> +#include <linux/types.h> +#include <linux/rcupdate.h> +#include <linux/if_ether.h> +#include <linux/in6.h> +#include <linux/jiffies.h> +#include <linux/time.h> +#include <linux/flex_array.h> +#include <net/inet_ecn.h> + +struct sk_buff; + +struct sw_flow_actions { + struct rcu_head rcu; + u32 actions_len; + struct nlattr actions[]; +}; + +struct sw_flow_key { + struct { + u32 priority; /* Packet QoS priority. */ + u16 in_port; /* Input switch port (or USHRT_MAX). */ + } phy; + struct { + u8 src[ETH_ALEN]; /* Ethernet source address. */ + u8 dst[ETH_ALEN]; /* Ethernet destination address. */ + __be16 tci; /* 0 if no VLAN, VLAN_TAG_PRESENT set otherwise. */ + __be16 type; /* Ethernet frame type. */ + } eth; + struct { + u8 proto; /* IP protocol or lower 8 bits of ARP opcode. */ + u8 tos; /* IP ToS. */ + u8 ttl; /* IP TTL/hop limit. */ + u8 frag; /* One of OVS_FRAG_TYPE_*. */ + } ip; + union { + struct { + struct { + __be32 src; /* IP source address. */ + __be32 dst; /* IP destination address. */ + } addr; + union { + struct { + __be16 src; /* TCP/UDP source port. */ + __be16 dst; /* TCP/UDP destination port. */ + } tp; + struct { + u8 sha[ETH_ALEN]; /* ARP source hardware address. */ + u8 tha[ETH_ALEN]; /* ARP target hardware address. */ + } arp; + }; + } ipv4; + struct { + struct { + struct in6_addr src; /* IPv6 source address. */ + struct in6_addr dst; /* IPv6 destination address. */ + } addr; + __be32 label; /* IPv6 flow label. */ + struct { + __be16 src; /* TCP/UDP source port. */ + __be16 dst; /* TCP/UDP destination port. */ + } tp; + struct { + struct in6_addr target; /* ND target address. */ + u8 sll[ETH_ALEN]; /* ND source link layer address. */ + u8 tll[ETH_ALEN]; /* ND target link layer address. */ + } nd; + } ipv6; + }; +}; + +struct sw_flow { + struct rcu_head rcu; + struct hlist_node hash_node[2]; + u32 hash; + + struct sw_flow_key key; + struct sw_flow_actions __rcu *sf_acts; + + spinlock_t lock; /* Lock for values below. */ + unsigned long used; /* Last used time (in jiffies). */ + u64 packet_count; /* Number of packets matched. */ + u64 byte_count; /* Number of bytes matched. */ + u8 tcp_flags; /* Union of seen TCP flags. */ +}; + +struct arp_eth_header { + __be16 ar_hrd; /* format of hardware address */ + __be16 ar_pro; /* format of protocol address */ + unsigned char ar_hln; /* length of hardware address */ + unsigned char ar_pln; /* length of protocol address */ + __be16 ar_op; /* ARP opcode (command) */ + + /* Ethernet+IPv4 specific members. */ + unsigned char ar_sha[ETH_ALEN]; /* sender hardware address */ + unsigned char ar_sip[4]; /* sender IP address */ + unsigned char ar_tha[ETH_ALEN]; /* target hardware address */ + unsigned char ar_tip[4]; /* target IP address */ +} __packed; + +int ovs_flow_init(void); +void ovs_flow_exit(void); + +struct sw_flow *ovs_flow_alloc(void); +void ovs_flow_deferred_free(struct sw_flow *); +void ovs_flow_free(struct sw_flow *flow); + +struct sw_flow_actions *ovs_flow_actions_alloc(const struct nlattr *); +void ovs_flow_deferred_free_acts(struct sw_flow_actions *); + +int ovs_flow_extract(struct sk_buff *, u16 in_port, struct sw_flow_key *, + int *key_lenp); +void ovs_flow_used(struct sw_flow *, struct sk_buff *); +u64 ovs_flow_used_time(unsigned long flow_jiffies); + +/* Upper bound on the length of a nlattr-formatted flow key. The longest + * nlattr-formatted flow key would be: + * + * struct pad nl hdr total + * ------ --- ------ ----- + * OVS_KEY_ATTR_PRIORITY 4 -- 4 8 + * OVS_KEY_ATTR_IN_PORT 4 -- 4 8 + * OVS_KEY_ATTR_ETHERNET 12 -- 4 16 + * OVS_KEY_ATTR_8021Q 4 -- 4 8 + * OVS_KEY_ATTR_ETHERTYPE 2 2 4 8 + * OVS_KEY_ATTR_IPV6 40 -- 4 44 + * OVS_KEY_ATTR_ICMPV6 2 2 4 8 + * OVS_KEY_ATTR_ND 28 -- 4 32 + * ------------------------------------------------- + * total 132 + */ +#define FLOW_BUFSIZE 132 + +int ovs_flow_to_nlattrs(const struct sw_flow_key *, struct sk_buff *); +int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, + const struct nlattr *); +int ovs_flow_metadata_from_nlattrs(u32 *priority, u16 *in_port, + const struct nlattr *); + +#define TBL_MIN_BUCKETS 1024 + +struct flow_table { + struct flex_array *buckets; + unsigned int count, n_buckets; + struct rcu_head rcu; + int node_ver; + u32 hash_seed; + bool keep_flows; +}; + +static inline int ovs_flow_tbl_count(struct flow_table *table) +{ + return table->count; +} + +static inline int ovs_flow_tbl_need_to_expand(struct flow_table *table) +{ + return (table->count > table->n_buckets); +} + +struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *table, + struct sw_flow_key *key, int len); +void ovs_flow_tbl_destroy(struct flow_table *table); +void ovs_flow_tbl_deferred_destroy(struct flow_table *table); +struct flow_table *ovs_flow_tbl_alloc(int new_size); +struct flow_table *ovs_flow_tbl_expand(struct flow_table *table); +struct flow_table *ovs_flow_tbl_rehash(struct flow_table *table); +void ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow); +void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow); +u32 ovs_flow_hash(const struct sw_flow_key *key, int key_len); + +struct sw_flow *ovs_flow_tbl_next(struct flow_table *table, u32 *bucket, u32 *idx); +extern const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1]; + +#endif /* flow.h */ diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c new file mode 100644 index 000000000000..8fc28b86f2b3 --- /dev/null +++ b/net/openvswitch/vport-internal_dev.c @@ -0,0 +1,241 @@ +/* + * Copyright (c) 2007-2011 Nicira Networks. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + */ + +#include <linux/hardirq.h> +#include <linux/if_vlan.h> +#include <linux/kernel.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/ethtool.h> +#include <linux/skbuff.h> +#include <linux/version.h> + +#include "datapath.h" +#include "vport-internal_dev.h" +#include "vport-netdev.h" + +struct internal_dev { + struct vport *vport; +}; + +static struct internal_dev *internal_dev_priv(struct net_device *netdev) +{ + return netdev_priv(netdev); +} + +/* This function is only called by the kernel network layer.*/ +static struct rtnl_link_stats64 *internal_dev_get_stats(struct net_device *netdev, + struct rtnl_link_stats64 *stats) +{ + struct vport *vport = ovs_internal_dev_get_vport(netdev); + struct ovs_vport_stats vport_stats; + + ovs_vport_get_stats(vport, &vport_stats); + + /* The tx and rx stats need to be swapped because the + * switch and host OS have opposite perspectives. */ + stats->rx_packets = vport_stats.tx_packets; + stats->tx_packets = vport_stats.rx_packets; + stats->rx_bytes = vport_stats.tx_bytes; + stats->tx_bytes = vport_stats.rx_bytes; + stats->rx_errors = vport_stats.tx_errors; + stats->tx_errors = vport_stats.rx_errors; + stats->rx_dropped = vport_stats.tx_dropped; + stats->tx_dropped = vport_stats.rx_dropped; + + return stats; +} + +static int internal_dev_mac_addr(struct net_device *dev, void *p) +{ + struct sockaddr *addr = p; + + if (!is_valid_ether_addr(addr->sa_data)) + return -EADDRNOTAVAIL; + memcpy(dev->dev_addr, addr->sa_data, dev->addr_len); + return 0; +} + +/* Called with rcu_read_lock_bh. */ +static int internal_dev_xmit(struct sk_buff *skb, struct net_device *netdev) +{ + rcu_read_lock(); + ovs_vport_receive(internal_dev_priv(netdev)->vport, skb); + rcu_read_unlock(); + return 0; +} + +static int internal_dev_open(struct net_device *netdev) +{ + netif_start_queue(netdev); + return 0; +} + +static int internal_dev_stop(struct net_device *netdev) +{ + netif_stop_queue(netdev); + return 0; +} + +static void internal_dev_getinfo(struct net_device *netdev, + struct ethtool_drvinfo *info) +{ + strcpy(info->driver, "openvswitch"); +} + +static const struct ethtool_ops internal_dev_ethtool_ops = { + .get_drvinfo = internal_dev_getinfo, + .get_link = ethtool_op_get_link, +}; + +static int internal_dev_change_mtu(struct net_device *netdev, int new_mtu) +{ + if (new_mtu < 68) + return -EINVAL; + + netdev->mtu = new_mtu; + return 0; +} + +static void internal_dev_destructor(struct net_device *dev) +{ + struct vport *vport = ovs_internal_dev_get_vport(dev); + + ovs_vport_free(vport); + free_netdev(dev); +} + +static const struct net_device_ops internal_dev_netdev_ops = { + .ndo_open = internal_dev_open, + .ndo_stop = internal_dev_stop, + .ndo_start_xmit = internal_dev_xmit, + .ndo_set_mac_address = internal_dev_mac_addr, + .ndo_change_mtu = internal_dev_change_mtu, + .ndo_get_stats64 = internal_dev_get_stats, +}; + +static void do_setup(struct net_device *netdev) +{ + ether_setup(netdev); + + netdev->netdev_ops = &internal_dev_netdev_ops; + + netdev->priv_flags &= ~IFF_TX_SKB_SHARING; + netdev->destructor = internal_dev_destructor; + SET_ETHTOOL_OPS(netdev, &internal_dev_ethtool_ops); + netdev->tx_queue_len = 0; + + netdev->features = NETIF_F_LLTX | NETIF_F_SG | NETIF_F_FRAGLIST | + NETIF_F_HIGHDMA | NETIF_F_HW_CSUM | NETIF_F_TSO; + + netdev->vlan_features = netdev->features; + netdev->features |= NETIF_F_HW_VLAN_TX; + netdev->hw_features = netdev->features & ~NETIF_F_LLTX; + random_ether_addr(netdev->dev_addr); +} + +static struct vport *internal_dev_create(const struct vport_parms *parms) +{ + struct vport *vport; + struct netdev_vport *netdev_vport; + struct internal_dev *internal_dev; + int err; + + vport = ovs_vport_alloc(sizeof(struct netdev_vport), + &ovs_internal_vport_ops, parms); + if (IS_ERR(vport)) { + err = PTR_ERR(vport); + goto error; + } + + netdev_vport = netdev_vport_priv(vport); + + netdev_vport->dev = alloc_netdev(sizeof(struct internal_dev), + parms->name, do_setup); + if (!netdev_vport->dev) { + err = -ENOMEM; + goto error_free_vport; + } + + internal_dev = internal_dev_priv(netdev_vport->dev); + internal_dev->vport = vport; + + err = register_netdevice(netdev_vport->dev); + if (err) + goto error_free_netdev; + + dev_set_promiscuity(netdev_vport->dev, 1); + netif_start_queue(netdev_vport->dev); + + return vport; + +error_free_netdev: + free_netdev(netdev_vport->dev); +error_free_vport: + ovs_vport_free(vport); +error: + return ERR_PTR(err); +} + +static void internal_dev_destroy(struct vport *vport) +{ + struct netdev_vport *netdev_vport = netdev_vport_priv(vport); + + netif_stop_queue(netdev_vport->dev); + dev_set_promiscuity(netdev_vport->dev, -1); + + /* unregister_netdevice() waits for an RCU grace period. */ + unregister_netdevice(netdev_vport->dev); +} + +static int internal_dev_recv(struct vport *vport, struct sk_buff *skb) +{ + struct net_device *netdev = netdev_vport_priv(vport)->dev; + int len; + + len = skb->len; + skb->dev = netdev; + skb->pkt_type = PACKET_HOST; + skb->protocol = eth_type_trans(skb, netdev); + + netif_rx(skb); + + return len; +} + +const struct vport_ops ovs_internal_vport_ops = { + .type = OVS_VPORT_TYPE_INTERNAL, + .create = internal_dev_create, + .destroy = internal_dev_destroy, + .get_name = ovs_netdev_get_name, + .get_ifindex = ovs_netdev_get_ifindex, + .send = internal_dev_recv, +}; + +int ovs_is_internal_dev(const struct net_device *netdev) +{ + return netdev->netdev_ops == &internal_dev_netdev_ops; +} + +struct vport *ovs_internal_dev_get_vport(struct net_device *netdev) +{ + if (!ovs_is_internal_dev(netdev)) + return NULL; + + return internal_dev_priv(netdev)->vport; +} diff --git a/net/openvswitch/vport-internal_dev.h b/net/openvswitch/vport-internal_dev.h new file mode 100644 index 000000000000..3454447c5f11 --- /dev/null +++ b/net/openvswitch/vport-internal_dev.h @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2007-2011 Nicira Networks. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + */ + +#ifndef VPORT_INTERNAL_DEV_H +#define VPORT_INTERNAL_DEV_H 1 + +#include "datapath.h" +#include "vport.h" + +int ovs_is_internal_dev(const struct net_device *); +struct vport *ovs_internal_dev_get_vport(struct net_device *); + +#endif /* vport-internal_dev.h */ diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c new file mode 100644 index 000000000000..c1068aed03d1 --- /dev/null +++ b/net/openvswitch/vport-netdev.c @@ -0,0 +1,198 @@ +/* + * Copyright (c) 2007-2011 Nicira Networks. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/if_arp.h> +#include <linux/if_bridge.h> +#include <linux/if_vlan.h> +#include <linux/kernel.h> +#include <linux/llc.h> +#include <linux/rtnetlink.h> +#include <linux/skbuff.h> + +#include <net/llc.h> + +#include "datapath.h" +#include "vport-internal_dev.h" +#include "vport-netdev.h" + +/* Must be called with rcu_read_lock. */ +static void netdev_port_receive(struct vport *vport, struct sk_buff *skb) +{ + if (unlikely(!vport)) { + kfree_skb(skb); + return; + } + + /* Make our own copy of the packet. Otherwise we will mangle the + * packet for anyone who came before us (e.g. tcpdump via AF_PACKET). + * (No one comes after us, since we tell handle_bridge() that we took + * the packet.) */ + skb = skb_share_check(skb, GFP_ATOMIC); + if (unlikely(!skb)) + return; + + skb_push(skb, ETH_HLEN); + ovs_vport_receive(vport, skb); +} + +/* Called with rcu_read_lock and bottom-halves disabled. */ +static rx_handler_result_t netdev_frame_hook(struct sk_buff **pskb) +{ + struct sk_buff *skb = *pskb; + struct vport *vport; + + if (unlikely(skb->pkt_type == PACKET_LOOPBACK)) + return RX_HANDLER_PASS; + + vport = ovs_netdev_get_vport(skb->dev); + + netdev_port_receive(vport, skb); + + return RX_HANDLER_CONSUMED; +} + +static struct vport *netdev_create(const struct vport_parms *parms) +{ + struct vport *vport; + struct netdev_vport *netdev_vport; + int err; + + vport = ovs_vport_alloc(sizeof(struct netdev_vport), + &ovs_netdev_vport_ops, parms); + if (IS_ERR(vport)) { + err = PTR_ERR(vport); + goto error; + } + + netdev_vport = netdev_vport_priv(vport); + + netdev_vport->dev = dev_get_by_name(&init_net, parms->name); + if (!netdev_vport->dev) { + err = -ENODEV; + goto error_free_vport; + } + + if (netdev_vport->dev->flags & IFF_LOOPBACK || + netdev_vport->dev->type != ARPHRD_ETHER || + ovs_is_internal_dev(netdev_vport->dev)) { + err = -EINVAL; + goto error_put; + } + + err = netdev_rx_handler_register(netdev_vport->dev, netdev_frame_hook, + vport); + if (err) + goto error_put; + + dev_set_promiscuity(netdev_vport->dev, 1); + netdev_vport->dev->priv_flags |= IFF_OVS_DATAPATH; + + return vport; + +error_put: + dev_put(netdev_vport->dev); +error_free_vport: + ovs_vport_free(vport); +error: + return ERR_PTR(err); +} + +static void netdev_destroy(struct vport *vport) +{ + struct netdev_vport *netdev_vport = netdev_vport_priv(vport); + + netdev_vport->dev->priv_flags &= ~IFF_OVS_DATAPATH; + netdev_rx_handler_unregister(netdev_vport->dev); + dev_set_promiscuity(netdev_vport->dev, -1); + + synchronize_rcu(); + + dev_put(netdev_vport->dev); + ovs_vport_free(vport); +} + +const char *ovs_netdev_get_name(const struct vport *vport) +{ + const struct netdev_vport *netdev_vport = netdev_vport_priv(vport); + return netdev_vport->dev->name; +} + +int ovs_netdev_get_ifindex(const struct vport *vport) +{ + const struct netdev_vport *netdev_vport = netdev_vport_priv(vport); + return netdev_vport->dev->ifindex; +} + +static unsigned packet_length(const struct sk_buff *skb) +{ + unsigned length = skb->len - ETH_HLEN; + + if (skb->protocol == htons(ETH_P_8021Q)) + length -= VLAN_HLEN; + + return length; +} + +static int netdev_send(struct vport *vport, struct sk_buff *skb) +{ + struct netdev_vport *netdev_vport = netdev_vport_priv(vport); + int mtu = netdev_vport->dev->mtu; + int len; + + if (unlikely(packet_length(skb) > mtu && !skb_is_gso(skb))) { + if (net_ratelimit()) + pr_warn("%s: dropped over-mtu packet: %d > %d\n", + ovs_dp_name(vport->dp), packet_length(skb), mtu); + goto error; + } + + if (unlikely(skb_warn_if_lro(skb))) + goto error; + + skb->dev = netdev_vport->dev; + len = skb->len; + dev_queue_xmit(skb); + + return len; + +error: + kfree_skb(skb); + ovs_vport_record_error(vport, VPORT_E_TX_DROPPED); + return 0; +} + +/* Returns null if this device is not attached to a datapath. */ +struct vport *ovs_netdev_get_vport(struct net_device *dev) +{ + if (likely(dev->priv_flags & IFF_OVS_DATAPATH)) + return (struct vport *) + rcu_dereference_rtnl(dev->rx_handler_data); + else + return NULL; +} + +const struct vport_ops ovs_netdev_vport_ops = { + .type = OVS_VPORT_TYPE_NETDEV, + .create = netdev_create, + .destroy = netdev_destroy, + .get_name = ovs_netdev_get_name, + .get_ifindex = ovs_netdev_get_ifindex, + .send = netdev_send, +}; diff --git a/net/openvswitch/vport-netdev.h b/net/openvswitch/vport-netdev.h new file mode 100644 index 000000000000..fd9b008a0e6e --- /dev/null +++ b/net/openvswitch/vport-netdev.h @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2007-2011 Nicira Networks. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + */ + +#ifndef VPORT_NETDEV_H +#define VPORT_NETDEV_H 1 + +#include <linux/netdevice.h> + +#include "vport.h" + +struct vport *ovs_netdev_get_vport(struct net_device *dev); + +struct netdev_vport { + struct net_device *dev; +}; + +static inline struct netdev_vport * +netdev_vport_priv(const struct vport *vport) +{ + return vport_priv(vport); +} + +const char *ovs_netdev_get_name(const struct vport *); +const char *ovs_netdev_get_config(const struct vport *); +int ovs_netdev_get_ifindex(const struct vport *); + +#endif /* vport_netdev.h */ diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c new file mode 100644 index 000000000000..7f0ef3794c51 --- /dev/null +++ b/net/openvswitch/vport.c @@ -0,0 +1,398 @@ +/* + * Copyright (c) 2007-2011 Nicira Networks. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + */ + +#include <linux/dcache.h> +#include <linux/etherdevice.h> +#include <linux/if.h> +#include <linux/if_vlan.h> +#include <linux/kernel.h> +#include <linux/list.h> +#include <linux/mutex.h> +#include <linux/percpu.h> +#include <linux/rcupdate.h> +#include <linux/rtnetlink.h> +#include <linux/compat.h> +#include <linux/version.h> + +#include "vport.h" +#include "vport-internal_dev.h" + +/* List of statically compiled vport implementations. Don't forget to also + * add yours to the list at the bottom of vport.h. */ +static const struct vport_ops *vport_ops_list[] = { + &ovs_netdev_vport_ops, + &ovs_internal_vport_ops, +}; + +/* Protected by RCU read lock for reading, RTNL lock for writing. */ +static struct hlist_head *dev_table; +#define VPORT_HASH_BUCKETS 1024 + +/** + * ovs_vport_init - initialize vport subsystem + * + * Called at module load time to initialize the vport subsystem. + */ +int ovs_vport_init(void) +{ + dev_table = kzalloc(VPORT_HASH_BUCKETS * sizeof(struct hlist_head), + GFP_KERNEL); + if (!dev_table) + return -ENOMEM; + + return 0; +} + +/** + * ovs_vport_exit - shutdown vport subsystem + * + * Called at module exit time to shutdown the vport subsystem. + */ +void ovs_vport_exit(void) +{ + kfree(dev_table); +} + +static struct hlist_head *hash_bucket(const char *name) +{ + unsigned int hash = full_name_hash(name, strlen(name)); + return &dev_table[hash & (VPORT_HASH_BUCKETS - 1)]; +} + +/** + * ovs_vport_locate - find a port that has already been created + * + * @name: name of port to find + * + * Must be called with RTNL or RCU read lock. + */ +struct vport *ovs_vport_locate(const char *name) +{ + struct hlist_head *bucket = hash_bucket(name); + struct vport *vport; + struct hlist_node *node; + + hlist_for_each_entry_rcu(vport, node, bucket, hash_node) + if (!strcmp(name, vport->ops->get_name(vport))) + return vport; + + return NULL; +} + +/** + * ovs_vport_alloc - allocate and initialize new vport + * + * @priv_size: Size of private data area to allocate. + * @ops: vport device ops + * + * Allocate and initialize a new vport defined by @ops. The vport will contain + * a private data area of size @priv_size that can be accessed using + * vport_priv(). vports that are no longer needed should be released with + * vport_free(). + */ +struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops, + const struct vport_parms *parms) +{ + struct vport *vport; + size_t alloc_size; + + alloc_size = sizeof(struct vport); + if (priv_size) { + alloc_size = ALIGN(alloc_size, VPORT_ALIGN); + alloc_size += priv_size; + } + + vport = kzalloc(alloc_size, GFP_KERNEL); + if (!vport) + return ERR_PTR(-ENOMEM); + + vport->dp = parms->dp; + vport->port_no = parms->port_no; + vport->upcall_pid = parms->upcall_pid; + vport->ops = ops; + + vport->percpu_stats = alloc_percpu(struct vport_percpu_stats); + if (!vport->percpu_stats) { + kfree(vport); + return ERR_PTR(-ENOMEM); + } + + spin_lock_init(&vport->stats_lock); + + return vport; +} + +/** + * ovs_vport_free - uninitialize and free vport + * + * @vport: vport to free + * + * Frees a vport allocated with vport_alloc() when it is no longer needed. + * + * The caller must ensure that an RCU grace period has passed since the last + * time @vport was in a datapath. + */ +void ovs_vport_free(struct vport *vport) +{ + free_percpu(vport->percpu_stats); + kfree(vport); +} + +/** + * ovs_vport_add - add vport device (for kernel callers) + * + * @parms: Information about new vport. + * + * Creates a new vport with the specified configuration (which is dependent on + * device type). RTNL lock must be held. + */ +struct vport *ovs_vport_add(const struct vport_parms *parms) +{ + struct vport *vport; + int err = 0; + int i; + + ASSERT_RTNL(); + + for (i = 0; i < ARRAY_SIZE(vport_ops_list); i++) { + if (vport_ops_list[i]->type == parms->type) { + vport = vport_ops_list[i]->create(parms); + if (IS_ERR(vport)) { + err = PTR_ERR(vport); + goto out; + } + + hlist_add_head_rcu(&vport->hash_node, + hash_bucket(vport->ops->get_name(vport))); + return vport; + } + } + + err = -EAFNOSUPPORT; + +out: + return ERR_PTR(err); +} + +/** + * ovs_vport_set_options - modify existing vport device (for kernel callers) + * + * @vport: vport to modify. + * @port: New configuration. + * + * Modifies an existing device with the specified configuration (which is + * dependent on device type). RTNL lock must be held. + */ +int ovs_vport_set_options(struct vport *vport, struct nlattr *options) +{ + ASSERT_RTNL(); + + if (!vport->ops->set_options) + return -EOPNOTSUPP; + return vport->ops->set_options(vport, options); +} + +/** + * ovs_vport_del - delete existing vport device + * + * @vport: vport to delete. + * + * Detaches @vport from its datapath and destroys it. It is possible to fail + * for reasons such as lack of memory. RTNL lock must be held. + */ +void ovs_vport_del(struct vport *vport) +{ + ASSERT_RTNL(); + + hlist_del_rcu(&vport->hash_node); + + vport->ops->destroy(vport); +} + +/** + * ovs_vport_get_stats - retrieve device stats + * + * @vport: vport from which to retrieve the stats + * @stats: location to store stats + * + * Retrieves transmit, receive, and error stats for the given device. + * + * Must be called with RTNL lock or rcu_read_lock. + */ +void ovs_vport_get_stats(struct vport *vport, struct ovs_vport_stats *stats) +{ + int i; + + memset(stats, 0, sizeof(*stats)); + + /* We potentially have 2 sources of stats that need to be combined: + * those we have collected (split into err_stats and percpu_stats) from + * set_stats() and device error stats from netdev->get_stats() (for + * errors that happen downstream and therefore aren't reported through + * our vport_record_error() function). + * Stats from first source are reported by ovs (OVS_VPORT_ATTR_STATS). + * netdev-stats can be directly read over netlink-ioctl. + */ + + spin_lock_bh(&vport->stats_lock); + + stats->rx_errors = vport->err_stats.rx_errors; + stats->tx_errors = vport->err_stats.tx_errors; + stats->tx_dropped = vport->err_stats.tx_dropped; + stats->rx_dropped = vport->err_stats.rx_dropped; + + spin_unlock_bh(&vport->stats_lock); + + for_each_possible_cpu(i) { + const struct vport_percpu_stats *percpu_stats; + struct vport_percpu_stats local_stats; + unsigned int start; + + percpu_stats = per_cpu_ptr(vport->percpu_stats, i); + + do { + start = u64_stats_fetch_begin_bh(&percpu_stats->sync); + local_stats = *percpu_stats; + } while (u64_stats_fetch_retry_bh(&percpu_stats->sync, start)); + + stats->rx_bytes += local_stats.rx_bytes; + stats->rx_packets += local_stats.rx_packets; + stats->tx_bytes += local_stats.tx_bytes; + stats->tx_packets += local_stats.tx_packets; + } +} + +/** + * ovs_vport_get_options - retrieve device options + * + * @vport: vport from which to retrieve the options. + * @skb: sk_buff where options should be appended. + * + * Retrieves the configuration of the given device, appending an + * %OVS_VPORT_ATTR_OPTIONS attribute that in turn contains nested + * vport-specific attributes to @skb. + * + * Returns 0 if successful, -EMSGSIZE if @skb has insufficient room, or another + * negative error code if a real error occurred. If an error occurs, @skb is + * left unmodified. + * + * Must be called with RTNL lock or rcu_read_lock. + */ +int ovs_vport_get_options(const struct vport *vport, struct sk_buff *skb) +{ + struct nlattr *nla; + + nla = nla_nest_start(skb, OVS_VPORT_ATTR_OPTIONS); + if (!nla) + return -EMSGSIZE; + + if (vport->ops->get_options) { + int err = vport->ops->get_options(vport, skb); + if (err) { + nla_nest_cancel(skb, nla); + return err; + } + } + + nla_nest_end(skb, nla); + return 0; +} + +/** + * ovs_vport_receive - pass up received packet to the datapath for processing + * + * @vport: vport that received the packet + * @skb: skb that was received + * + * Must be called with rcu_read_lock. The packet cannot be shared and + * skb->data should point to the Ethernet header. The caller must have already + * called compute_ip_summed() to initialize the checksumming fields. + */ +void ovs_vport_receive(struct vport *vport, struct sk_buff *skb) +{ + struct vport_percpu_stats *stats; + + stats = per_cpu_ptr(vport->percpu_stats, smp_processor_id()); + + u64_stats_update_begin(&stats->sync); + stats->rx_packets++; + stats->rx_bytes += skb->len; + u64_stats_update_end(&stats->sync); + + ovs_dp_process_received_packet(vport, skb); +} + +/** + * ovs_vport_send - send a packet on a device + * + * @vport: vport on which to send the packet + * @skb: skb to send + * + * Sends the given packet and returns the length of data sent. Either RTNL + * lock or rcu_read_lock must be held. + */ +int ovs_vport_send(struct vport *vport, struct sk_buff *skb) +{ + int sent = vport->ops->send(vport, skb); + + if (likely(sent)) { + struct vport_percpu_stats *stats; + + stats = per_cpu_ptr(vport->percpu_stats, smp_processor_id()); + + u64_stats_update_begin(&stats->sync); + stats->tx_packets++; + stats->tx_bytes += sent; + u64_stats_update_end(&stats->sync); + } + return sent; +} + +/** + * ovs_vport_record_error - indicate device error to generic stats layer + * + * @vport: vport that encountered the error + * @err_type: one of enum vport_err_type types to indicate the error type + * + * If using the vport generic stats layer indicate that an error of the given + * type has occured. + */ +void ovs_vport_record_error(struct vport *vport, enum vport_err_type err_type) +{ + spin_lock(&vport->stats_lock); + + switch (err_type) { + case VPORT_E_RX_DROPPED: + vport->err_stats.rx_dropped++; + break; + + case VPORT_E_RX_ERROR: + vport->err_stats.rx_errors++; + break; + + case VPORT_E_TX_DROPPED: + vport->err_stats.tx_dropped++; + break; + + case VPORT_E_TX_ERROR: + vport->err_stats.tx_errors++; + break; + }; + + spin_unlock(&vport->stats_lock); +} diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h new file mode 100644 index 000000000000..19609629dabd --- /dev/null +++ b/net/openvswitch/vport.h @@ -0,0 +1,205 @@ +/* + * Copyright (c) 2007-2011 Nicira Networks. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + */ + +#ifndef VPORT_H +#define VPORT_H 1 + +#include <linux/list.h> +#include <linux/openvswitch.h> +#include <linux/skbuff.h> +#include <linux/spinlock.h> +#include <linux/u64_stats_sync.h> + +#include "datapath.h" + +struct vport; +struct vport_parms; + +/* The following definitions are for users of the vport subsytem: */ + +int ovs_vport_init(void); +void ovs_vport_exit(void); + +struct vport *ovs_vport_add(const struct vport_parms *); +void ovs_vport_del(struct vport *); + +struct vport *ovs_vport_locate(const char *name); + +void ovs_vport_get_stats(struct vport *, struct ovs_vport_stats *); + +int ovs_vport_set_options(struct vport *, struct nlattr *options); +int ovs_vport_get_options(const struct vport *, struct sk_buff *); + +int ovs_vport_send(struct vport *, struct sk_buff *); + +/* The following definitions are for implementers of vport devices: */ + +struct vport_percpu_stats { + u64 rx_bytes; + u64 rx_packets; + u64 tx_bytes; + u64 tx_packets; + struct u64_stats_sync sync; +}; + +struct vport_err_stats { + u64 rx_dropped; + u64 rx_errors; + u64 tx_dropped; + u64 tx_errors; +}; + +/** + * struct vport - one port within a datapath + * @rcu: RCU callback head for deferred destruction. + * @port_no: Index into @dp's @ports array. + * @dp: Datapath to which this port belongs. + * @node: Element in @dp's @port_list. + * @upcall_pid: The Netlink port to use for packets received on this port that + * miss the flow table. + * @hash_node: Element in @dev_table hash table in vport.c. + * @ops: Class structure. + * @percpu_stats: Points to per-CPU statistics used and maintained by vport + * @stats_lock: Protects @err_stats; + * @err_stats: Points to error statistics used and maintained by vport + */ +struct vport { + struct rcu_head rcu; + u16 port_no; + struct datapath *dp; + struct list_head node; + u32 upcall_pid; + + struct hlist_node hash_node; + const struct vport_ops *ops; + + struct vport_percpu_stats __percpu *percpu_stats; + + spinlock_t stats_lock; + struct vport_err_stats err_stats; +}; + +/** + * struct vport_parms - parameters for creating a new vport + * + * @name: New vport's name. + * @type: New vport's type. + * @options: %OVS_VPORT_ATTR_OPTIONS attribute from Netlink message, %NULL if + * none was supplied. + * @dp: New vport's datapath. + * @port_no: New vport's port number. + */ +struct vport_parms { + const char *name; + enum ovs_vport_type type; + struct nlattr *options; + + /* For ovs_vport_alloc(). */ + struct datapath *dp; + u16 port_no; + u32 upcall_pid; +}; + +/** + * struct vport_ops - definition of a type of virtual port + * + * @type: %OVS_VPORT_TYPE_* value for this type of virtual port. + * @create: Create a new vport configured as specified. On success returns + * a new vport allocated with ovs_vport_alloc(), otherwise an ERR_PTR() value. + * @destroy: Destroys a vport. Must call vport_free() on the vport but not + * before an RCU grace period has elapsed. + * @set_options: Modify the configuration of an existing vport. May be %NULL + * if modification is not supported. + * @get_options: Appends vport-specific attributes for the configuration of an + * existing vport to a &struct sk_buff. May be %NULL for a vport that does not + * have any configuration. + * @get_name: Get the device's name. + * @get_config: Get the device's configuration. + * @get_ifindex: Get the system interface index associated with the device. + * May be null if the device does not have an ifindex. + * @send: Send a packet on the device. Returns the length of the packet sent. + */ +struct vport_ops { + enum ovs_vport_type type; + + /* Called with RTNL lock. */ + struct vport *(*create)(const struct vport_parms *); + void (*destroy)(struct vport *); + + int (*set_options)(struct vport *, struct nlattr *); + int (*get_options)(const struct vport *, struct sk_buff *); + + /* Called with rcu_read_lock or RTNL lock. */ + const char *(*get_name)(const struct vport *); + void (*get_config)(const struct vport *, void *); + int (*get_ifindex)(const struct vport *); + + int (*send)(struct vport *, struct sk_buff *); +}; + +enum vport_err_type { + VPORT_E_RX_DROPPED, + VPORT_E_RX_ERROR, + VPORT_E_TX_DROPPED, + VPORT_E_TX_ERROR, +}; + +struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *, + const struct vport_parms *); +void ovs_vport_free(struct vport *); + +#define VPORT_ALIGN 8 + +/** + * vport_priv - access private data area of vport + * + * @vport: vport to access + * + * If a nonzero size was passed in priv_size of vport_alloc() a private data + * area was allocated on creation. This allows that area to be accessed and + * used for any purpose needed by the vport implementer. + */ +static inline void *vport_priv(const struct vport *vport) +{ + return (u8 *)vport + ALIGN(sizeof(struct vport), VPORT_ALIGN); +} + +/** + * vport_from_priv - lookup vport from private data pointer + * + * @priv: Start of private data area. + * + * It is sometimes useful to translate from a pointer to the private data + * area to the vport, such as in the case where the private data pointer is + * the result of a hash table lookup. @priv must point to the start of the + * private data area. + */ +static inline struct vport *vport_from_priv(const void *priv) +{ + return (struct vport *)(priv - ALIGN(sizeof(struct vport), VPORT_ALIGN)); +} + +void ovs_vport_receive(struct vport *, struct sk_buff *); +void ovs_vport_record_error(struct vport *, enum vport_err_type err_type); + +/* List of statically compiled vport implementations. Don't forget to also + * add yours to the list at the top of vport.c. */ +extern const struct vport_ops ovs_netdev_vport_ops; +extern const struct vport_ops ovs_internal_vport_ops; + +#endif /* vport.h */ diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index fabb4fafa281..2dbb32b988c4 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -40,6 +40,10 @@ * byte arrays at the end of sockaddr_ll * and packet_mreq. * Johann Baudy : Added TX RING. + * Chetan Loke : Implemented TPACKET_V3 block abstraction + * layer. + * Copyright (C) 2011, <lokec@ccs.neu.edu> + * * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -161,9 +165,56 @@ struct packet_mreq_max { unsigned char mr_address[MAX_ADDR_LEN]; }; -static int packet_set_ring(struct sock *sk, struct tpacket_req *req, +static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, int closing, int tx_ring); + +#define V3_ALIGNMENT (8) + +#define BLK_HDR_LEN (ALIGN(sizeof(struct tpacket_block_desc), V3_ALIGNMENT)) + +#define BLK_PLUS_PRIV(sz_of_priv) \ + (BLK_HDR_LEN + ALIGN((sz_of_priv), V3_ALIGNMENT)) + +/* kbdq - kernel block descriptor queue */ +struct tpacket_kbdq_core { + struct pgv *pkbdq; + unsigned int feature_req_word; + unsigned int hdrlen; + unsigned char reset_pending_on_curr_blk; + unsigned char delete_blk_timer; + unsigned short kactive_blk_num; + unsigned short blk_sizeof_priv; + + /* last_kactive_blk_num: + * trick to see if user-space has caught up + * in order to avoid refreshing timer when every single pkt arrives. + */ + unsigned short last_kactive_blk_num; + + char *pkblk_start; + char *pkblk_end; + int kblk_size; + unsigned int knum_blocks; + uint64_t knxt_seq_num; + char *prev; + char *nxt_offset; + struct sk_buff *skb; + + atomic_t blk_fill_in_prog; + + /* Default is set to 8ms */ +#define DEFAULT_PRB_RETIRE_TOV (8) + + unsigned short retire_blk_tov; + unsigned short version; + unsigned long tov_in_jiffies; + + /* timer to retire an outstanding block */ + struct timer_list retire_blk_timer; +}; + +#define PGV_FROM_VMALLOC 1 struct pgv { char *buffer; }; @@ -179,12 +230,44 @@ struct packet_ring_buffer { unsigned int pg_vec_pages; unsigned int pg_vec_len; + struct tpacket_kbdq_core prb_bdqc; atomic_t pending; }; +#define BLOCK_STATUS(x) ((x)->hdr.bh1.block_status) +#define BLOCK_NUM_PKTS(x) ((x)->hdr.bh1.num_pkts) +#define BLOCK_O2FP(x) ((x)->hdr.bh1.offset_to_first_pkt) +#define BLOCK_LEN(x) ((x)->hdr.bh1.blk_len) +#define BLOCK_SNUM(x) ((x)->hdr.bh1.seq_num) +#define BLOCK_O2PRIV(x) ((x)->offset_to_priv) +#define BLOCK_PRIV(x) ((void *)((char *)(x) + BLOCK_O2PRIV(x))) + struct packet_sock; static int tpacket_snd(struct packet_sock *po, struct msghdr *msg); +static void *packet_previous_frame(struct packet_sock *po, + struct packet_ring_buffer *rb, + int status); +static void packet_increment_head(struct packet_ring_buffer *buff); +static int prb_curr_blk_in_use(struct tpacket_kbdq_core *, + struct tpacket_block_desc *); +static void *prb_dispatch_next_block(struct tpacket_kbdq_core *, + struct packet_sock *); +static void prb_retire_current_block(struct tpacket_kbdq_core *, + struct packet_sock *, unsigned int status); +static int prb_queue_frozen(struct tpacket_kbdq_core *); +static void prb_open_block(struct tpacket_kbdq_core *, + struct tpacket_block_desc *); +static void prb_retire_rx_blk_timer_expired(unsigned long); +static void _prb_refresh_rx_retire_blk_timer(struct tpacket_kbdq_core *); +static void prb_init_blk_timer(struct packet_sock *, + struct tpacket_kbdq_core *, + void (*func) (unsigned long)); +static void prb_fill_rxhash(struct tpacket_kbdq_core *, struct tpacket3_hdr *); +static void prb_clear_rxhash(struct tpacket_kbdq_core *, + struct tpacket3_hdr *); +static void prb_fill_vlan_info(struct tpacket_kbdq_core *, + struct tpacket3_hdr *); static void packet_flush_mclist(struct sock *sk); struct packet_fanout; @@ -193,6 +276,7 @@ struct packet_sock { struct sock sk; struct packet_fanout *fanout; struct tpacket_stats stats; + union tpacket_stats_u stats_u; struct packet_ring_buffer rx_ring; struct packet_ring_buffer tx_ring; int copy_thresh; @@ -242,7 +326,16 @@ struct packet_skb_cb { #define PACKET_SKB_CB(__skb) ((struct packet_skb_cb *)((__skb)->cb)) -static inline struct packet_sock *pkt_sk(struct sock *sk) +#define GET_PBDQC_FROM_RB(x) ((struct tpacket_kbdq_core *)(&(x)->prb_bdqc)) +#define GET_PBLOCK_DESC(x, bid) \ + ((struct tpacket_block_desc *)((x)->pkbdq[(bid)].buffer)) +#define GET_CURR_PBLOCK_DESC_FROM_CORE(x) \ + ((struct tpacket_block_desc *)((x)->pkbdq[(x)->kactive_blk_num].buffer)) +#define GET_NEXT_PRB_BLK_NUM(x) \ + (((x)->kactive_blk_num < ((x)->knum_blocks-1)) ? \ + ((x)->kactive_blk_num+1) : 0) + +static struct packet_sock *pkt_sk(struct sock *sk) { return (struct packet_sock *)sk; } @@ -325,8 +418,9 @@ static void __packet_set_status(struct packet_sock *po, void *frame, int status) h.h2->tp_status = status; flush_dcache_page(pgv_to_page(&h.h2->tp_status)); break; + case TPACKET_V3: default: - pr_err("TPACKET version not supported\n"); + WARN(1, "TPACKET version not supported.\n"); BUG(); } @@ -351,8 +445,9 @@ static int __packet_get_status(struct packet_sock *po, void *frame) case TPACKET_V2: flush_dcache_page(pgv_to_page(&h.h2->tp_status)); return h.h2->tp_status; + case TPACKET_V3: default: - pr_err("TPACKET version not supported\n"); + WARN(1, "TPACKET version not supported.\n"); BUG(); return 0; } @@ -382,14 +477,678 @@ static void *packet_lookup_frame(struct packet_sock *po, return h.raw; } -static inline void *packet_current_frame(struct packet_sock *po, +static void *packet_current_frame(struct packet_sock *po, struct packet_ring_buffer *rb, int status) { return packet_lookup_frame(po, rb, rb->head, status); } -static inline void *packet_previous_frame(struct packet_sock *po, +static void prb_del_retire_blk_timer(struct tpacket_kbdq_core *pkc) +{ + del_timer_sync(&pkc->retire_blk_timer); +} + +static void prb_shutdown_retire_blk_timer(struct packet_sock *po, + int tx_ring, + struct sk_buff_head *rb_queue) +{ + struct tpacket_kbdq_core *pkc; + + pkc = tx_ring ? &po->tx_ring.prb_bdqc : &po->rx_ring.prb_bdqc; + + spin_lock(&rb_queue->lock); + pkc->delete_blk_timer = 1; + spin_unlock(&rb_queue->lock); + + prb_del_retire_blk_timer(pkc); +} + +static void prb_init_blk_timer(struct packet_sock *po, + struct tpacket_kbdq_core *pkc, + void (*func) (unsigned long)) +{ + init_timer(&pkc->retire_blk_timer); + pkc->retire_blk_timer.data = (long)po; + pkc->retire_blk_timer.function = func; + pkc->retire_blk_timer.expires = jiffies; +} + +static void prb_setup_retire_blk_timer(struct packet_sock *po, int tx_ring) +{ + struct tpacket_kbdq_core *pkc; + + if (tx_ring) + BUG(); + + pkc = tx_ring ? &po->tx_ring.prb_bdqc : &po->rx_ring.prb_bdqc; + prb_init_blk_timer(po, pkc, prb_retire_rx_blk_timer_expired); +} + +static int prb_calc_retire_blk_tmo(struct packet_sock *po, + int blk_size_in_bytes) +{ + struct net_device *dev; + unsigned int mbits = 0, msec = 0, div = 0, tmo = 0; + struct ethtool_cmd ecmd; + int err; + + rtnl_lock(); + dev = __dev_get_by_index(sock_net(&po->sk), po->ifindex); + if (unlikely(!dev)) { + rtnl_unlock(); + return DEFAULT_PRB_RETIRE_TOV; + } + err = __ethtool_get_settings(dev, &ecmd); + rtnl_unlock(); + if (!err) { + switch (ecmd.speed) { + case SPEED_10000: + msec = 1; + div = 10000/1000; + break; + case SPEED_1000: + msec = 1; + div = 1000/1000; + break; + /* + * If the link speed is so slow you don't really + * need to worry about perf anyways + */ + case SPEED_100: + case SPEED_10: + default: + return DEFAULT_PRB_RETIRE_TOV; + } + } + + mbits = (blk_size_in_bytes * 8) / (1024 * 1024); + + if (div) + mbits /= div; + + tmo = mbits * msec; + + if (div) + return tmo+1; + return tmo; +} + +static void prb_init_ft_ops(struct tpacket_kbdq_core *p1, + union tpacket_req_u *req_u) +{ + p1->feature_req_word = req_u->req3.tp_feature_req_word; +} + +static void init_prb_bdqc(struct packet_sock *po, + struct packet_ring_buffer *rb, + struct pgv *pg_vec, + union tpacket_req_u *req_u, int tx_ring) +{ + struct tpacket_kbdq_core *p1 = &rb->prb_bdqc; + struct tpacket_block_desc *pbd; + + memset(p1, 0x0, sizeof(*p1)); + + p1->knxt_seq_num = 1; + p1->pkbdq = pg_vec; + pbd = (struct tpacket_block_desc *)pg_vec[0].buffer; + p1->pkblk_start = (char *)pg_vec[0].buffer; + p1->kblk_size = req_u->req3.tp_block_size; + p1->knum_blocks = req_u->req3.tp_block_nr; + p1->hdrlen = po->tp_hdrlen; + p1->version = po->tp_version; + p1->last_kactive_blk_num = 0; + po->stats_u.stats3.tp_freeze_q_cnt = 0; + if (req_u->req3.tp_retire_blk_tov) + p1->retire_blk_tov = req_u->req3.tp_retire_blk_tov; + else + p1->retire_blk_tov = prb_calc_retire_blk_tmo(po, + req_u->req3.tp_block_size); + p1->tov_in_jiffies = msecs_to_jiffies(p1->retire_blk_tov); + p1->blk_sizeof_priv = req_u->req3.tp_sizeof_priv; + + prb_init_ft_ops(p1, req_u); + prb_setup_retire_blk_timer(po, tx_ring); + prb_open_block(p1, pbd); +} + +/* Do NOT update the last_blk_num first. + * Assumes sk_buff_head lock is held. + */ +static void _prb_refresh_rx_retire_blk_timer(struct tpacket_kbdq_core *pkc) +{ + mod_timer(&pkc->retire_blk_timer, + jiffies + pkc->tov_in_jiffies); + pkc->last_kactive_blk_num = pkc->kactive_blk_num; +} + +/* + * Timer logic: + * 1) We refresh the timer only when we open a block. + * By doing this we don't waste cycles refreshing the timer + * on packet-by-packet basis. + * + * With a 1MB block-size, on a 1Gbps line, it will take + * i) ~8 ms to fill a block + ii) memcpy etc. + * In this cut we are not accounting for the memcpy time. + * + * So, if the user sets the 'tmo' to 10ms then the timer + * will never fire while the block is still getting filled + * (which is what we want). However, the user could choose + * to close a block early and that's fine. + * + * But when the timer does fire, we check whether or not to refresh it. + * Since the tmo granularity is in msecs, it is not too expensive + * to refresh the timer, lets say every '8' msecs. + * Either the user can set the 'tmo' or we can derive it based on + * a) line-speed and b) block-size. + * prb_calc_retire_blk_tmo() calculates the tmo. + * + */ +static void prb_retire_rx_blk_timer_expired(unsigned long data) +{ + struct packet_sock *po = (struct packet_sock *)data; + struct tpacket_kbdq_core *pkc = &po->rx_ring.prb_bdqc; + unsigned int frozen; + struct tpacket_block_desc *pbd; + + spin_lock(&po->sk.sk_receive_queue.lock); + + frozen = prb_queue_frozen(pkc); + pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc); + + if (unlikely(pkc->delete_blk_timer)) + goto out; + + /* We only need to plug the race when the block is partially filled. + * tpacket_rcv: + * lock(); increment BLOCK_NUM_PKTS; unlock() + * copy_bits() is in progress ... + * timer fires on other cpu: + * we can't retire the current block because copy_bits + * is in progress. + * + */ + if (BLOCK_NUM_PKTS(pbd)) { + while (atomic_read(&pkc->blk_fill_in_prog)) { + /* Waiting for skb_copy_bits to finish... */ + cpu_relax(); + } + } + + if (pkc->last_kactive_blk_num == pkc->kactive_blk_num) { + if (!frozen) { + prb_retire_current_block(pkc, po, TP_STATUS_BLK_TMO); + if (!prb_dispatch_next_block(pkc, po)) + goto refresh_timer; + else + goto out; + } else { + /* Case 1. Queue was frozen because user-space was + * lagging behind. + */ + if (prb_curr_blk_in_use(pkc, pbd)) { + /* + * Ok, user-space is still behind. + * So just refresh the timer. + */ + goto refresh_timer; + } else { + /* Case 2. queue was frozen,user-space caught up, + * now the link went idle && the timer fired. + * We don't have a block to close.So we open this + * block and restart the timer. + * opening a block thaws the queue,restarts timer + * Thawing/timer-refresh is a side effect. + */ + prb_open_block(pkc, pbd); + goto out; + } + } + } + +refresh_timer: + _prb_refresh_rx_retire_blk_timer(pkc); + +out: + spin_unlock(&po->sk.sk_receive_queue.lock); +} + +static void prb_flush_block(struct tpacket_kbdq_core *pkc1, + struct tpacket_block_desc *pbd1, __u32 status) +{ + /* Flush everything minus the block header */ + +#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1 + u8 *start, *end; + + start = (u8 *)pbd1; + + /* Skip the block header(we know header WILL fit in 4K) */ + start += PAGE_SIZE; + + end = (u8 *)PAGE_ALIGN((unsigned long)pkc1->pkblk_end); + for (; start < end; start += PAGE_SIZE) + flush_dcache_page(pgv_to_page(start)); + + smp_wmb(); +#endif + + /* Now update the block status. */ + + BLOCK_STATUS(pbd1) = status; + + /* Flush the block header */ + +#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1 + start = (u8 *)pbd1; + flush_dcache_page(pgv_to_page(start)); + + smp_wmb(); +#endif +} + +/* + * Side effect: + * + * 1) flush the block + * 2) Increment active_blk_num + * + * Note:We DONT refresh the timer on purpose. + * Because almost always the next block will be opened. + */ +static void prb_close_block(struct tpacket_kbdq_core *pkc1, + struct tpacket_block_desc *pbd1, + struct packet_sock *po, unsigned int stat) +{ + __u32 status = TP_STATUS_USER | stat; + + struct tpacket3_hdr *last_pkt; + struct tpacket_hdr_v1 *h1 = &pbd1->hdr.bh1; + + if (po->stats.tp_drops) + status |= TP_STATUS_LOSING; + + last_pkt = (struct tpacket3_hdr *)pkc1->prev; + last_pkt->tp_next_offset = 0; + + /* Get the ts of the last pkt */ + if (BLOCK_NUM_PKTS(pbd1)) { + h1->ts_last_pkt.ts_sec = last_pkt->tp_sec; + h1->ts_last_pkt.ts_nsec = last_pkt->tp_nsec; + } else { + /* Ok, we tmo'd - so get the current time */ + struct timespec ts; + getnstimeofday(&ts); + h1->ts_last_pkt.ts_sec = ts.tv_sec; + h1->ts_last_pkt.ts_nsec = ts.tv_nsec; + } + + smp_wmb(); + + /* Flush the block */ + prb_flush_block(pkc1, pbd1, status); + + pkc1->kactive_blk_num = GET_NEXT_PRB_BLK_NUM(pkc1); +} + +static void prb_thaw_queue(struct tpacket_kbdq_core *pkc) +{ + pkc->reset_pending_on_curr_blk = 0; +} + +/* + * Side effect of opening a block: + * + * 1) prb_queue is thawed. + * 2) retire_blk_timer is refreshed. + * + */ +static void prb_open_block(struct tpacket_kbdq_core *pkc1, + struct tpacket_block_desc *pbd1) +{ + struct timespec ts; + struct tpacket_hdr_v1 *h1 = &pbd1->hdr.bh1; + + smp_rmb(); + + if (likely(TP_STATUS_KERNEL == BLOCK_STATUS(pbd1))) { + + /* We could have just memset this but we will lose the + * flexibility of making the priv area sticky + */ + BLOCK_SNUM(pbd1) = pkc1->knxt_seq_num++; + BLOCK_NUM_PKTS(pbd1) = 0; + BLOCK_LEN(pbd1) = BLK_PLUS_PRIV(pkc1->blk_sizeof_priv); + getnstimeofday(&ts); + h1->ts_first_pkt.ts_sec = ts.tv_sec; + h1->ts_first_pkt.ts_nsec = ts.tv_nsec; + pkc1->pkblk_start = (char *)pbd1; + pkc1->nxt_offset = (char *)(pkc1->pkblk_start + + BLK_PLUS_PRIV(pkc1->blk_sizeof_priv)); + BLOCK_O2FP(pbd1) = (__u32)BLK_PLUS_PRIV(pkc1->blk_sizeof_priv); + BLOCK_O2PRIV(pbd1) = BLK_HDR_LEN; + pbd1->version = pkc1->version; + pkc1->prev = pkc1->nxt_offset; + pkc1->pkblk_end = pkc1->pkblk_start + pkc1->kblk_size; + prb_thaw_queue(pkc1); + _prb_refresh_rx_retire_blk_timer(pkc1); + + smp_wmb(); + + return; + } + + WARN(1, "ERROR block:%p is NOT FREE status:%d kactive_blk_num:%d\n", + pbd1, BLOCK_STATUS(pbd1), pkc1->kactive_blk_num); + dump_stack(); + BUG(); +} + +/* + * Queue freeze logic: + * 1) Assume tp_block_nr = 8 blocks. + * 2) At time 't0', user opens Rx ring. + * 3) Some time past 't0', kernel starts filling blocks starting from 0 .. 7 + * 4) user-space is either sleeping or processing block '0'. + * 5) tpacket_rcv is currently filling block '7', since there is no space left, + * it will close block-7,loop around and try to fill block '0'. + * call-flow: + * __packet_lookup_frame_in_block + * prb_retire_current_block() + * prb_dispatch_next_block() + * |->(BLOCK_STATUS == USER) evaluates to true + * 5.1) Since block-0 is currently in-use, we just freeze the queue. + * 6) Now there are two cases: + * 6.1) Link goes idle right after the queue is frozen. + * But remember, the last open_block() refreshed the timer. + * When this timer expires,it will refresh itself so that we can + * re-open block-0 in near future. + * 6.2) Link is busy and keeps on receiving packets. This is a simple + * case and __packet_lookup_frame_in_block will check if block-0 + * is free and can now be re-used. + */ +static void prb_freeze_queue(struct tpacket_kbdq_core *pkc, + struct packet_sock *po) +{ + pkc->reset_pending_on_curr_blk = 1; + po->stats_u.stats3.tp_freeze_q_cnt++; +} + +#define TOTAL_PKT_LEN_INCL_ALIGN(length) (ALIGN((length), V3_ALIGNMENT)) + +/* + * If the next block is free then we will dispatch it + * and return a good offset. + * Else, we will freeze the queue. + * So, caller must check the return value. + */ +static void *prb_dispatch_next_block(struct tpacket_kbdq_core *pkc, + struct packet_sock *po) +{ + struct tpacket_block_desc *pbd; + + smp_rmb(); + + /* 1. Get current block num */ + pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc); + + /* 2. If this block is currently in_use then freeze the queue */ + if (TP_STATUS_USER & BLOCK_STATUS(pbd)) { + prb_freeze_queue(pkc, po); + return NULL; + } + + /* + * 3. + * open this block and return the offset where the first packet + * needs to get stored. + */ + prb_open_block(pkc, pbd); + return (void *)pkc->nxt_offset; +} + +static void prb_retire_current_block(struct tpacket_kbdq_core *pkc, + struct packet_sock *po, unsigned int status) +{ + struct tpacket_block_desc *pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc); + + /* retire/close the current block */ + if (likely(TP_STATUS_KERNEL == BLOCK_STATUS(pbd))) { + /* + * Plug the case where copy_bits() is in progress on + * cpu-0 and tpacket_rcv() got invoked on cpu-1, didn't + * have space to copy the pkt in the current block and + * called prb_retire_current_block() + * + * We don't need to worry about the TMO case because + * the timer-handler already handled this case. + */ + if (!(status & TP_STATUS_BLK_TMO)) { + while (atomic_read(&pkc->blk_fill_in_prog)) { + /* Waiting for skb_copy_bits to finish... */ + cpu_relax(); + } + } + prb_close_block(pkc, pbd, po, status); + return; + } + + WARN(1, "ERROR-pbd[%d]:%p\n", pkc->kactive_blk_num, pbd); + dump_stack(); + BUG(); +} + +static int prb_curr_blk_in_use(struct tpacket_kbdq_core *pkc, + struct tpacket_block_desc *pbd) +{ + return TP_STATUS_USER & BLOCK_STATUS(pbd); +} + +static int prb_queue_frozen(struct tpacket_kbdq_core *pkc) +{ + return pkc->reset_pending_on_curr_blk; +} + +static void prb_clear_blk_fill_status(struct packet_ring_buffer *rb) +{ + struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(rb); + atomic_dec(&pkc->blk_fill_in_prog); +} + +static void prb_fill_rxhash(struct tpacket_kbdq_core *pkc, + struct tpacket3_hdr *ppd) +{ + ppd->hv1.tp_rxhash = skb_get_rxhash(pkc->skb); +} + +static void prb_clear_rxhash(struct tpacket_kbdq_core *pkc, + struct tpacket3_hdr *ppd) +{ + ppd->hv1.tp_rxhash = 0; +} + +static void prb_fill_vlan_info(struct tpacket_kbdq_core *pkc, + struct tpacket3_hdr *ppd) +{ + if (vlan_tx_tag_present(pkc->skb)) { + ppd->hv1.tp_vlan_tci = vlan_tx_tag_get(pkc->skb); + ppd->tp_status = TP_STATUS_VLAN_VALID; + } else { + ppd->hv1.tp_vlan_tci = ppd->tp_status = 0; + } +} + +static void prb_run_all_ft_ops(struct tpacket_kbdq_core *pkc, + struct tpacket3_hdr *ppd) +{ + prb_fill_vlan_info(pkc, ppd); + + if (pkc->feature_req_word & TP_FT_REQ_FILL_RXHASH) + prb_fill_rxhash(pkc, ppd); + else + prb_clear_rxhash(pkc, ppd); +} + +static void prb_fill_curr_block(char *curr, + struct tpacket_kbdq_core *pkc, + struct tpacket_block_desc *pbd, + unsigned int len) +{ + struct tpacket3_hdr *ppd; + + ppd = (struct tpacket3_hdr *)curr; + ppd->tp_next_offset = TOTAL_PKT_LEN_INCL_ALIGN(len); + pkc->prev = curr; + pkc->nxt_offset += TOTAL_PKT_LEN_INCL_ALIGN(len); + BLOCK_LEN(pbd) += TOTAL_PKT_LEN_INCL_ALIGN(len); + BLOCK_NUM_PKTS(pbd) += 1; + atomic_inc(&pkc->blk_fill_in_prog); + prb_run_all_ft_ops(pkc, ppd); +} + +/* Assumes caller has the sk->rx_queue.lock */ +static void *__packet_lookup_frame_in_block(struct packet_sock *po, + struct sk_buff *skb, + int status, + unsigned int len + ) +{ + struct tpacket_kbdq_core *pkc; + struct tpacket_block_desc *pbd; + char *curr, *end; + + pkc = GET_PBDQC_FROM_RB(((struct packet_ring_buffer *)&po->rx_ring)); + pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc); + + /* Queue is frozen when user space is lagging behind */ + if (prb_queue_frozen(pkc)) { + /* + * Check if that last block which caused the queue to freeze, + * is still in_use by user-space. + */ + if (prb_curr_blk_in_use(pkc, pbd)) { + /* Can't record this packet */ + return NULL; + } else { + /* + * Ok, the block was released by user-space. + * Now let's open that block. + * opening a block also thaws the queue. + * Thawing is a side effect. + */ + prb_open_block(pkc, pbd); + } + } + + smp_mb(); + curr = pkc->nxt_offset; + pkc->skb = skb; + end = (char *) ((char *)pbd + pkc->kblk_size); + + /* first try the current block */ + if (curr+TOTAL_PKT_LEN_INCL_ALIGN(len) < end) { + prb_fill_curr_block(curr, pkc, pbd, len); + return (void *)curr; + } + + /* Ok, close the current block */ + prb_retire_current_block(pkc, po, 0); + + /* Now, try to dispatch the next block */ + curr = (char *)prb_dispatch_next_block(pkc, po); + if (curr) { + pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc); + prb_fill_curr_block(curr, pkc, pbd, len); + return (void *)curr; + } + + /* + * No free blocks are available.user_space hasn't caught up yet. + * Queue was just frozen and now this packet will get dropped. + */ + return NULL; +} + +static void *packet_current_rx_frame(struct packet_sock *po, + struct sk_buff *skb, + int status, unsigned int len) +{ + char *curr = NULL; + switch (po->tp_version) { + case TPACKET_V1: + case TPACKET_V2: + curr = packet_lookup_frame(po, &po->rx_ring, + po->rx_ring.head, status); + return curr; + case TPACKET_V3: + return __packet_lookup_frame_in_block(po, skb, status, len); + default: + WARN(1, "TPACKET version not supported\n"); + BUG(); + return 0; + } +} + +static void *prb_lookup_block(struct packet_sock *po, + struct packet_ring_buffer *rb, + unsigned int previous, + int status) +{ + struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(rb); + struct tpacket_block_desc *pbd = GET_PBLOCK_DESC(pkc, previous); + + if (status != BLOCK_STATUS(pbd)) + return NULL; + return pbd; +} + +static int prb_previous_blk_num(struct packet_ring_buffer *rb) +{ + unsigned int prev; + if (rb->prb_bdqc.kactive_blk_num) + prev = rb->prb_bdqc.kactive_blk_num-1; + else + prev = rb->prb_bdqc.knum_blocks-1; + return prev; +} + +/* Assumes caller has held the rx_queue.lock */ +static void *__prb_previous_block(struct packet_sock *po, + struct packet_ring_buffer *rb, + int status) +{ + unsigned int previous = prb_previous_blk_num(rb); + return prb_lookup_block(po, rb, previous, status); +} + +static void *packet_previous_rx_frame(struct packet_sock *po, + struct packet_ring_buffer *rb, + int status) +{ + if (po->tp_version <= TPACKET_V2) + return packet_previous_frame(po, rb, status); + + return __prb_previous_block(po, rb, status); +} + +static void packet_increment_rx_head(struct packet_sock *po, + struct packet_ring_buffer *rb) +{ + switch (po->tp_version) { + case TPACKET_V1: + case TPACKET_V2: + return packet_increment_head(rb); + case TPACKET_V3: + default: + WARN(1, "TPACKET version not supported.\n"); + BUG(); + return; + } +} + +static void *packet_previous_frame(struct packet_sock *po, struct packet_ring_buffer *rb, int status) { @@ -397,7 +1156,7 @@ static inline void *packet_previous_frame(struct packet_sock *po, return packet_lookup_frame(po, rb, previous, status); } -static inline void packet_increment_head(struct packet_ring_buffer *buff) +static void packet_increment_head(struct packet_ring_buffer *buff) { buff->head = buff->head != buff->frame_max ? buff->head+1 : 0; } @@ -454,43 +1213,6 @@ static struct sock *fanout_demux_cpu(struct packet_fanout *f, struct sk_buff *sk return f->arr[cpu % num]; } -static struct sk_buff *fanout_check_defrag(struct sk_buff *skb) -{ -#ifdef CONFIG_INET - const struct iphdr *iph; - u32 len; - - if (skb->protocol != htons(ETH_P_IP)) - return skb; - - if (!pskb_may_pull(skb, sizeof(struct iphdr))) - return skb; - - iph = ip_hdr(skb); - if (iph->ihl < 5 || iph->version != 4) - return skb; - if (!pskb_may_pull(skb, iph->ihl*4)) - return skb; - iph = ip_hdr(skb); - len = ntohs(iph->tot_len); - if (skb->len < len || len < (iph->ihl * 4)) - return skb; - - if (ip_is_fragment(ip_hdr(skb))) { - skb = skb_share_check(skb, GFP_ATOMIC); - if (skb) { - if (pskb_trim_rcsum(skb, len)) - return skb; - memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); - if (ip_defrag(skb, IP_DEFRAG_AF_PACKET)) - return NULL; - skb->rxhash = 0; - } - } -#endif - return skb; -} - static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { @@ -509,7 +1231,7 @@ static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev, case PACKET_FANOUT_HASH: default: if (f->defrag) { - skb = fanout_check_defrag(skb); + skb = ip_check_defrag(skb, IP_DEFRAG_AF_PACKET); if (!skb) return 0; } @@ -777,10 +1499,11 @@ retry: if (!skb) { size_t reserved = LL_RESERVED_SPACE(dev); + int tlen = dev->needed_tailroom; unsigned int hhlen = dev->header_ops ? dev->hard_header_len : 0; rcu_read_unlock(); - skb = sock_wmalloc(sk, len + reserved, 0, GFP_KERNEL); + skb = sock_wmalloc(sk, len + reserved + tlen, 0, GFP_KERNEL); if (skb == NULL) return -ENOBUFS; /* FIXME: Save some space for broken drivers that write a hard @@ -836,7 +1559,7 @@ out_free: return err; } -static inline unsigned int run_filter(const struct sk_buff *skb, +static unsigned int run_filter(const struct sk_buff *skb, const struct sock *sk, unsigned int res) { @@ -908,8 +1631,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, if (snaplen > res) snaplen = res; - if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >= - (unsigned)sk->sk_rcvbuf) + if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) goto drop_n_acct; if (skb_shared(skb)) { @@ -985,12 +1707,13 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, union { struct tpacket_hdr *h1; struct tpacket2_hdr *h2; + struct tpacket3_hdr *h3; void *raw; } h; u8 *skb_head = skb->data; int skb_len = skb->len; unsigned int snaplen, res; - unsigned long status = TP_STATUS_LOSING|TP_STATUS_USER; + unsigned long status = TP_STATUS_USER; unsigned short macoff, netoff, hdrlen; struct sk_buff *copy_skb = NULL; struct timeval tv; @@ -1036,37 +1759,45 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, po->tp_reserve; macoff = netoff - maclen; } - - if (macoff + snaplen > po->rx_ring.frame_size) { - if (po->copy_thresh && - atomic_read(&sk->sk_rmem_alloc) + skb->truesize < - (unsigned)sk->sk_rcvbuf) { - if (skb_shared(skb)) { - copy_skb = skb_clone(skb, GFP_ATOMIC); - } else { - copy_skb = skb_get(skb); - skb_head = skb->data; + if (po->tp_version <= TPACKET_V2) { + if (macoff + snaplen > po->rx_ring.frame_size) { + if (po->copy_thresh && + atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) { + if (skb_shared(skb)) { + copy_skb = skb_clone(skb, GFP_ATOMIC); + } else { + copy_skb = skb_get(skb); + skb_head = skb->data; + } + if (copy_skb) + skb_set_owner_r(copy_skb, sk); } - if (copy_skb) - skb_set_owner_r(copy_skb, sk); + snaplen = po->rx_ring.frame_size - macoff; + if ((int)snaplen < 0) + snaplen = 0; } - snaplen = po->rx_ring.frame_size - macoff; - if ((int)snaplen < 0) - snaplen = 0; } - spin_lock(&sk->sk_receive_queue.lock); - h.raw = packet_current_frame(po, &po->rx_ring, TP_STATUS_KERNEL); + h.raw = packet_current_rx_frame(po, skb, + TP_STATUS_KERNEL, (macoff+snaplen)); if (!h.raw) goto ring_is_full; - packet_increment_head(&po->rx_ring); + if (po->tp_version <= TPACKET_V2) { + packet_increment_rx_head(po, &po->rx_ring); + /* + * LOSING will be reported till you read the stats, + * because it's COR - Clear On Read. + * Anyways, moving it for V1/V2 only as V3 doesn't need this + * at packet level. + */ + if (po->stats.tp_drops) + status |= TP_STATUS_LOSING; + } po->stats.tp_packets++; if (copy_skb) { status |= TP_STATUS_COPY; __skb_queue_tail(&sk->sk_receive_queue, copy_skb); } - if (!po->stats.tp_drops) - status &= ~TP_STATUS_LOSING; spin_unlock(&sk->sk_receive_queue.lock); skb_copy_bits(skb, 0, h.raw + macoff, snaplen); @@ -1117,6 +1848,29 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, h.h2->tp_padding = 0; hdrlen = sizeof(*h.h2); break; + case TPACKET_V3: + /* tp_nxt_offset,vlan are already populated above. + * So DONT clear those fields here + */ + h.h3->tp_status |= status; + h.h3->tp_len = skb->len; + h.h3->tp_snaplen = snaplen; + h.h3->tp_mac = macoff; + h.h3->tp_net = netoff; + if ((po->tp_tstamp & SOF_TIMESTAMPING_SYS_HARDWARE) + && shhwtstamps->syststamp.tv64) + ts = ktime_to_timespec(shhwtstamps->syststamp); + else if ((po->tp_tstamp & SOF_TIMESTAMPING_RAW_HARDWARE) + && shhwtstamps->hwtstamp.tv64) + ts = ktime_to_timespec(shhwtstamps->hwtstamp); + else if (skb->tstamp.tv64) + ts = ktime_to_timespec(skb->tstamp); + else + getnstimeofday(&ts); + h.h3->tp_sec = ts.tv_sec; + h.h3->tp_nsec = ts.tv_nsec; + hdrlen = sizeof(*h.h3); + break; default: BUG(); } @@ -1137,13 +1891,19 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, { u8 *start, *end; - end = (u8 *)PAGE_ALIGN((unsigned long)h.raw + macoff + snaplen); - for (start = h.raw; start < end; start += PAGE_SIZE) - flush_dcache_page(pgv_to_page(start)); + if (po->tp_version <= TPACKET_V2) { + end = (u8 *)PAGE_ALIGN((unsigned long)h.raw + + macoff + snaplen); + for (start = h.raw; start < end; start += PAGE_SIZE) + flush_dcache_page(pgv_to_page(start)); + } smp_wmb(); } #endif - __packet_set_status(po, h.raw, status); + if (po->tp_version <= TPACKET_V2) + __packet_set_status(po, h.raw, status); + else + prb_clear_blk_fill_status(&po->rx_ring); sk->sk_data_ready(sk, 0); @@ -1170,8 +1930,6 @@ static void tpacket_destruct_skb(struct sk_buff *skb) struct packet_sock *po = pkt_sk(skb->sk); void *ph; - BUG_ON(skb == NULL); - if (likely(po->tx_ring.pg_vec)) { ph = skb_shinfo(skb)->destructor_arg; BUG_ON(__packet_get_status(po, ph) != TP_STATUS_SENDING); @@ -1185,7 +1943,7 @@ static void tpacket_destruct_skb(struct sk_buff *skb) static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, void *frame, struct net_device *dev, int size_max, - __be16 proto, unsigned char *addr) + __be16 proto, unsigned char *addr, int hlen) { union { struct tpacket_hdr *h1; @@ -1219,7 +1977,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, return -EMSGSIZE; } - skb_reserve(skb, LL_RESERVED_SPACE(dev)); + skb_reserve(skb, hlen); skb_reset_network_header(skb); data = ph.raw + po->tp_hdrlen - sizeof(struct sockaddr_ll); @@ -1294,6 +2052,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) unsigned char *addr; int len_sum = 0; int status = 0; + int hlen, tlen; mutex_lock(&po->pg_vec_lock); @@ -1342,16 +2101,17 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) } status = TP_STATUS_SEND_REQUEST; + hlen = LL_RESERVED_SPACE(dev); + tlen = dev->needed_tailroom; skb = sock_alloc_send_skb(&po->sk, - LL_ALLOCATED_SPACE(dev) - + sizeof(struct sockaddr_ll), + hlen + tlen + sizeof(struct sockaddr_ll), 0, &err); if (unlikely(skb == NULL)) goto out_status; tp_len = tpacket_fill_skb(po, skb, ph, dev, size_max, proto, - addr); + addr, hlen); if (unlikely(tp_len < 0)) { if (po->tp_loss) { @@ -1408,10 +2168,10 @@ out: return err; } -static inline struct sk_buff *packet_alloc_skb(struct sock *sk, size_t prepad, - size_t reserve, size_t len, - size_t linear, int noblock, - int *err) +static struct sk_buff *packet_alloc_skb(struct sock *sk, size_t prepad, + size_t reserve, size_t len, + size_t linear, int noblock, + int *err) { struct sk_buff *skb; @@ -1448,6 +2208,7 @@ static int packet_snd(struct socket *sock, int vnet_hdr_len; struct packet_sock *po = pkt_sk(sk); unsigned short gso_type = 0; + int hlen, tlen; /* * Get and verify the address. @@ -1532,8 +2293,9 @@ static int packet_snd(struct socket *sock, goto out_unlock; err = -ENOBUFS; - skb = packet_alloc_skb(sk, LL_ALLOCATED_SPACE(dev), - LL_RESERVED_SPACE(dev), len, vnet_hdr.hdr_len, + hlen = LL_RESERVED_SPACE(dev); + tlen = dev->needed_tailroom; + skb = packet_alloc_skb(sk, hlen + tlen, hlen, len, vnet_hdr.hdr_len, msg->msg_flags & MSG_DONTWAIT, &err); if (skb == NULL) goto out_unlock; @@ -1634,7 +2396,7 @@ static int packet_release(struct socket *sock) struct sock *sk = sock->sk; struct packet_sock *po; struct net *net; - struct tpacket_req req; + union tpacket_req_u req_u; if (!sk) return 0; @@ -1657,13 +2419,13 @@ static int packet_release(struct socket *sock) packet_flush_mclist(sk); - memset(&req, 0, sizeof(req)); + memset(&req_u, 0, sizeof(req_u)); if (po->rx_ring.pg_vec) - packet_set_ring(sk, &req, 1, 0); + packet_set_ring(sk, &req_u, 1, 0); if (po->tx_ring.pg_vec) - packet_set_ring(sk, &req, 1, 1); + packet_set_ring(sk, &req_u, 1, 1); fanout_release(sk); @@ -1691,8 +2453,12 @@ static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 protoc { struct packet_sock *po = pkt_sk(sk); - if (po->fanout) + if (po->fanout) { + if (dev) + dev_put(dev); + return -EINVAL; + } lock_sock(sk); @@ -2283,15 +3049,27 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv case PACKET_RX_RING: case PACKET_TX_RING: { - struct tpacket_req req; + union tpacket_req_u req_u; + int len; - if (optlen < sizeof(req)) + switch (po->tp_version) { + case TPACKET_V1: + case TPACKET_V2: + len = sizeof(req_u.req); + break; + case TPACKET_V3: + default: + len = sizeof(req_u.req3); + break; + } + if (optlen < len) return -EINVAL; if (pkt_sk(sk)->has_vnet_hdr) return -EINVAL; - if (copy_from_user(&req, optval, sizeof(req))) + if (copy_from_user(&req_u.req, optval, len)) return -EFAULT; - return packet_set_ring(sk, &req, 0, optname == PACKET_TX_RING); + return packet_set_ring(sk, &req_u, 0, + optname == PACKET_TX_RING); } case PACKET_COPY_THRESH: { @@ -2318,6 +3096,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv switch (val) { case TPACKET_V1: case TPACKET_V2: + case TPACKET_V3: po->tp_version = val; return 0; default: @@ -2427,6 +3206,7 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, struct packet_sock *po = pkt_sk(sk); void *data; struct tpacket_stats st; + union tpacket_stats_u st_u; if (level != SOL_PACKET) return -ENOPROTOOPT; @@ -2439,15 +3219,27 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, switch (optname) { case PACKET_STATISTICS: - if (len > sizeof(struct tpacket_stats)) - len = sizeof(struct tpacket_stats); + if (po->tp_version == TPACKET_V3) { + len = sizeof(struct tpacket_stats_v3); + } else { + if (len > sizeof(struct tpacket_stats)) + len = sizeof(struct tpacket_stats); + } spin_lock_bh(&sk->sk_receive_queue.lock); - st = po->stats; + if (po->tp_version == TPACKET_V3) { + memcpy(&st_u.stats3, &po->stats, + sizeof(struct tpacket_stats)); + st_u.stats3.tp_freeze_q_cnt = + po->stats_u.stats3.tp_freeze_q_cnt; + st_u.stats3.tp_packets += po->stats.tp_drops; + data = &st_u.stats3; + } else { + st = po->stats; + st.tp_packets += st.tp_drops; + data = &st; + } memset(&po->stats, 0, sizeof(st)); spin_unlock_bh(&sk->sk_receive_queue.lock); - st.tp_packets += st.tp_drops; - - data = &st; break; case PACKET_AUXDATA: if (len > sizeof(int)) @@ -2488,6 +3280,9 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, case TPACKET_V2: val = sizeof(struct tpacket2_hdr); break; + case TPACKET_V3: + val = sizeof(struct tpacket3_hdr); + break; default: return -EINVAL; } @@ -2644,7 +3439,8 @@ static unsigned int packet_poll(struct file *file, struct socket *sock, spin_lock_bh(&sk->sk_receive_queue.lock); if (po->rx_ring.pg_vec) { - if (!packet_previous_frame(po, &po->rx_ring, TP_STATUS_KERNEL)) + if (!packet_previous_rx_frame(po, &po->rx_ring, + TP_STATUS_KERNEL)) mask |= POLLIN | POLLRDNORM; } spin_unlock_bh(&sk->sk_receive_queue.lock); @@ -2705,7 +3501,7 @@ static void free_pg_vec(struct pgv *pg_vec, unsigned int order, kfree(pg_vec); } -static inline char *alloc_one_pg_vec_page(unsigned long order) +static char *alloc_one_pg_vec_page(unsigned long order) { char *buffer = NULL; gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP | @@ -2763,7 +3559,7 @@ out_free_pgvec: goto out; } -static int packet_set_ring(struct sock *sk, struct tpacket_req *req, +static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, int closing, int tx_ring) { struct pgv *pg_vec = NULL; @@ -2772,7 +3568,15 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req, struct packet_ring_buffer *rb; struct sk_buff_head *rb_queue; __be16 num; - int err; + int err = -EINVAL; + /* Added to avoid minimal code churn */ + struct tpacket_req *req = &req_u->req; + + /* Opening a Tx-ring is NOT supported in TPACKET_V3 */ + if (!closing && tx_ring && (po->tp_version > TPACKET_V2)) { + WARN(1, "Tx-ring is not supported.\n"); + goto out; + } rb = tx_ring ? &po->tx_ring : &po->rx_ring; rb_queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue; @@ -2798,6 +3602,9 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req, case TPACKET_V2: po->tp_hdrlen = TPACKET2_HDRLEN; break; + case TPACKET_V3: + po->tp_hdrlen = TPACKET3_HDRLEN; + break; } err = -EINVAL; @@ -2823,6 +3630,17 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req, pg_vec = alloc_pg_vec(req, order); if (unlikely(!pg_vec)) goto out; + switch (po->tp_version) { + case TPACKET_V3: + /* Transmit path is not supported. We checked + * it above but just being paranoid + */ + if (!tx_ring) + init_prb_bdqc(po, rb, pg_vec, req_u, tx_ring); + break; + default: + break; + } } /* Done */ else { @@ -2875,7 +3693,11 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req, register_prot_hook(sk); } spin_unlock(&po->bind_lock); - + if (closing && (po->tp_version > TPACKET_V2)) { + /* Because we don't support block-based V3 on tx-ring */ + if (!tx_ring) + prb_shutdown_retire_blk_timer(po, tx_ring, rb_queue); + } release_sock(sk); if (pg_vec) diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c index c6fffd946d42..d65f699fbf34 100644 --- a/net/phonet/af_phonet.c +++ b/net/phonet/af_phonet.c @@ -491,7 +491,7 @@ void phonet_proto_unregister(unsigned int protocol, struct phonet_protocol *pp) { mutex_lock(&proto_tab_lock); BUG_ON(proto_tab[protocol] != pp); - rcu_assign_pointer(proto_tab[protocol], NULL); + RCU_INIT_POINTER(proto_tab[protocol], NULL); mutex_unlock(&proto_tab_lock); synchronize_rcu(); proto_unregister(pp->prot); diff --git a/net/phonet/datagram.c b/net/phonet/datagram.c index 2f032381bd45..bf35b4e1a14c 100644 --- a/net/phonet/datagram.c +++ b/net/phonet/datagram.c @@ -30,6 +30,7 @@ #include <net/sock.h> #include <linux/phonet.h> +#include <linux/export.h> #include <net/phonet/phonet.h> static int pn_backlog_rcv(struct sock *sk, struct sk_buff *skb); diff --git a/net/phonet/pep.c b/net/phonet/pep.c index f17fd841f948..9f60008740e3 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -30,6 +30,7 @@ #include <asm/ioctls.h> #include <linux/phonet.h> +#include <linux/module.h> #include <net/phonet/phonet.h> #include <net/phonet/pep.h> #include <net/phonet/gprs.h> @@ -533,6 +534,29 @@ static int pep_connresp_rcv(struct sock *sk, struct sk_buff *skb) return pipe_handler_send_created_ind(sk); } +static int pep_enableresp_rcv(struct sock *sk, struct sk_buff *skb) +{ + struct pnpipehdr *hdr = pnp_hdr(skb); + + if (hdr->error_code != PN_PIPE_NO_ERROR) + return -ECONNREFUSED; + + return pep_indicate(sk, PNS_PIPE_ENABLED_IND, 0 /* sub-blocks */, + NULL, 0, GFP_ATOMIC); + +} + +static void pipe_start_flow_control(struct sock *sk) +{ + struct pep_sock *pn = pep_sk(sk); + + if (!pn_flow_safe(pn->tx_fc)) { + atomic_set(&pn->tx_credits, 1); + sk->sk_write_space(sk); + } + pipe_grant_credits(sk, GFP_ATOMIC); +} + /* Queue an skb to an actively connected sock. * Socket lock must be held. */ static int pipe_handler_do_rcv(struct sock *sk, struct sk_buff *skb) @@ -578,13 +602,25 @@ static int pipe_handler_do_rcv(struct sock *sk, struct sk_buff *skb) sk->sk_state = TCP_CLOSE_WAIT; break; } + if (pn->init_enable == PN_PIPE_DISABLE) + sk->sk_state = TCP_SYN_RECV; + else { + sk->sk_state = TCP_ESTABLISHED; + pipe_start_flow_control(sk); + } + break; - sk->sk_state = TCP_ESTABLISHED; - if (!pn_flow_safe(pn->tx_fc)) { - atomic_set(&pn->tx_credits, 1); - sk->sk_write_space(sk); + case PNS_PEP_ENABLE_RESP: + if (sk->sk_state != TCP_SYN_SENT) + break; + + if (pep_enableresp_rcv(sk, skb)) { + sk->sk_state = TCP_CLOSE_WAIT; + break; } - pipe_grant_credits(sk, GFP_ATOMIC); + + sk->sk_state = TCP_ESTABLISHED; + pipe_start_flow_control(sk); break; case PNS_PEP_DISCONNECT_RESP: @@ -863,14 +899,32 @@ static int pep_sock_connect(struct sock *sk, struct sockaddr *addr, int len) int err; u8 data[4] = { 0 /* sub-blocks */, PAD, PAD, PAD }; - pn->pipe_handle = 1; /* anything but INVALID_HANDLE */ + if (pn->pipe_handle == PN_PIPE_INVALID_HANDLE) + pn->pipe_handle = 1; /* anything but INVALID_HANDLE */ + err = pipe_handler_request(sk, PNS_PEP_CONNECT_REQ, - PN_PIPE_ENABLE, data, 4); + pn->init_enable, data, 4); if (err) { pn->pipe_handle = PN_PIPE_INVALID_HANDLE; return err; } + sk->sk_state = TCP_SYN_SENT; + + return 0; +} + +static int pep_sock_enable(struct sock *sk, struct sockaddr *addr, int len) +{ + int err; + + err = pipe_handler_request(sk, PNS_PEP_ENABLE_REQ, PAD, + NULL, 0); + if (err) + return err; + + sk->sk_state = TCP_SYN_SENT; + return 0; } @@ -878,11 +932,14 @@ static int pep_ioctl(struct sock *sk, int cmd, unsigned long arg) { struct pep_sock *pn = pep_sk(sk); int answ; + int ret = -ENOIOCTLCMD; switch (cmd) { case SIOCINQ: - if (sk->sk_state == TCP_LISTEN) - return -EINVAL; + if (sk->sk_state == TCP_LISTEN) { + ret = -EINVAL; + break; + } lock_sock(sk); if (sock_flag(sk, SOCK_URGINLINE) && @@ -893,10 +950,22 @@ static int pep_ioctl(struct sock *sk, int cmd, unsigned long arg) else answ = 0; release_sock(sk); - return put_user(answ, (int __user *)arg); + ret = put_user(answ, (int __user *)arg); + break; + + case SIOCPNENABLEPIPE: + lock_sock(sk); + if (sk->sk_state == TCP_SYN_SENT) + ret = -EBUSY; + else if (sk->sk_state == TCP_ESTABLISHED) + ret = -EISCONN; + else + ret = pep_sock_enable(sk, NULL, 0); + release_sock(sk); + break; } - return -ENOIOCTLCMD; + return ret; } static int pep_init(struct sock *sk) @@ -959,6 +1028,18 @@ static int pep_setsockopt(struct sock *sk, int level, int optname, } goto out_norel; + case PNPIPE_HANDLE: + if ((sk->sk_state == TCP_CLOSE) && + (val >= 0) && (val < PN_PIPE_INVALID_HANDLE)) + pn->pipe_handle = val; + else + err = -EINVAL; + break; + + case PNPIPE_INITSTATE: + pn->init_enable = !!val; + break; + default: err = -ENOPROTOOPT; } @@ -994,6 +1075,10 @@ static int pep_getsockopt(struct sock *sk, int level, int optname, return -EINVAL; break; + case PNPIPE_INITSTATE: + val = pn->init_enable; + break; + default: return -ENOPROTOOPT; } diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c index d2df8f33160b..9b9a85ecc4c7 100644 --- a/net/phonet/pn_dev.c +++ b/net/phonet/pn_dev.c @@ -276,7 +276,7 @@ static void phonet_route_autodel(struct net_device *dev) mutex_lock(&pnn->routes.lock); for (i = 0; i < 64; i++) if (dev == pnn->routes.table[i]) { - rcu_assign_pointer(pnn->routes.table[i], NULL); + RCU_INIT_POINTER(pnn->routes.table[i], NULL); set_bit(i, deleted); } mutex_unlock(&pnn->routes.lock); @@ -406,7 +406,7 @@ int phonet_route_del(struct net_device *dev, u8 daddr) daddr = daddr >> 2; mutex_lock(&routes->lock); if (dev == routes->table[daddr]) - rcu_assign_pointer(routes->table[daddr], NULL); + RCU_INIT_POINTER(routes->table[daddr], NULL); else dev = NULL; mutex_unlock(&routes->lock); diff --git a/net/phonet/socket.c b/net/phonet/socket.c index ab07711cf2f4..4c7eff30dfa9 100644 --- a/net/phonet/socket.c +++ b/net/phonet/socket.c @@ -31,6 +31,7 @@ #include <net/tcp_states.h> #include <linux/phonet.h> +#include <linux/export.h> #include <net/phonet/phonet.h> #include <net/phonet/pep.h> #include <net/phonet/pn_dev.h> @@ -695,7 +696,7 @@ int pn_sock_unbind_res(struct sock *sk, u8 res) mutex_lock(&resource_mutex); if (pnres.sk[res] == sk) { - rcu_assign_pointer(pnres.sk[res], NULL); + RCU_INIT_POINTER(pnres.sk[res], NULL); ret = 0; } mutex_unlock(&resource_mutex); @@ -714,7 +715,7 @@ void pn_sock_unbind_all_res(struct sock *sk) mutex_lock(&resource_mutex); for (res = 0; res < 256; res++) { if (pnres.sk[res] == sk) { - rcu_assign_pointer(pnres.sk[res], NULL); + RCU_INIT_POINTER(pnres.sk[res], NULL); match++; } } diff --git a/net/rds/cong.c b/net/rds/cong.c index 6daaa49d133f..e5b65acd650b 100644 --- a/net/rds/cong.c +++ b/net/rds/cong.c @@ -34,6 +34,7 @@ #include <linux/types.h> #include <linux/rbtree.h> #include <linux/bitops.h> +#include <linux/export.h> #include "rds.h" diff --git a/net/rds/connection.c b/net/rds/connection.c index 9334d892366e..9e07c756d1f9 100644 --- a/net/rds/connection.c +++ b/net/rds/connection.c @@ -33,6 +33,7 @@ #include <linux/kernel.h> #include <linux/list.h> #include <linux/slab.h> +#include <linux/export.h> #include <net/inet_hashtables.h> #include "rds.h" diff --git a/net/rds/ib.c b/net/rds/ib.c index 3b83086bcc30..b4c8b0022fee 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c @@ -38,6 +38,7 @@ #include <linux/if_arp.h> #include <linux/delay.h> #include <linux/slab.h> +#include <linux/module.h> #include "rds.h" #include "ib.h" diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c index cd67026be2d5..51c868923f64 100644 --- a/net/rds/ib_cm.c +++ b/net/rds/ib_cm.c @@ -375,23 +375,21 @@ static int rds_ib_setup_qp(struct rds_connection *conn) goto out; } - ic->i_sends = vmalloc_node(ic->i_send_ring.w_nr * sizeof(struct rds_ib_send_work), + ic->i_sends = vzalloc_node(ic->i_send_ring.w_nr * sizeof(struct rds_ib_send_work), ibdev_to_node(dev)); if (!ic->i_sends) { ret = -ENOMEM; rdsdebug("send allocation failed\n"); goto out; } - memset(ic->i_sends, 0, ic->i_send_ring.w_nr * sizeof(struct rds_ib_send_work)); - ic->i_recvs = vmalloc_node(ic->i_recv_ring.w_nr * sizeof(struct rds_ib_recv_work), + ic->i_recvs = vzalloc_node(ic->i_recv_ring.w_nr * sizeof(struct rds_ib_recv_work), ibdev_to_node(dev)); if (!ic->i_recvs) { ret = -ENOMEM; rdsdebug("recv allocation failed\n"); goto out; } - memset(ic->i_recvs, 0, ic->i_recv_ring.w_nr * sizeof(struct rds_ib_recv_work)); rds_ib_recv_init_ack(ic); diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c index 819c35a0d9cb..e8fdb172adbb 100644 --- a/net/rds/ib_rdma.c +++ b/net/rds/ib_rdma.c @@ -33,10 +33,10 @@ #include <linux/kernel.h> #include <linux/slab.h> #include <linux/rculist.h> +#include <linux/llist.h> #include "rds.h" #include "ib.h" -#include "xlist.h" static DEFINE_PER_CPU(unsigned long, clean_list_grace); #define CLEAN_LIST_BUSY_BIT 0 @@ -49,7 +49,7 @@ struct rds_ib_mr { struct rds_ib_mr_pool *pool; struct ib_fmr *fmr; - struct xlist_head xlist; + struct llist_node llnode; /* unmap_list is for freeing */ struct list_head unmap_list; @@ -71,9 +71,9 @@ struct rds_ib_mr_pool { atomic_t item_count; /* total # of MRs */ atomic_t dirty_count; /* # dirty of MRs */ - struct xlist_head drop_list; /* MRs that have reached their max_maps limit */ - struct xlist_head free_list; /* unused MRs */ - struct xlist_head clean_list; /* global unused & unamapped MRs */ + struct llist_head drop_list; /* MRs that have reached their max_maps limit */ + struct llist_head free_list; /* unused MRs */ + struct llist_head clean_list; /* global unused & unamapped MRs */ wait_queue_head_t flush_wait; atomic_t free_pinned; /* memory pinned by free MRs */ @@ -220,9 +220,9 @@ struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *rds_ibdev) if (!pool) return ERR_PTR(-ENOMEM); - INIT_XLIST_HEAD(&pool->free_list); - INIT_XLIST_HEAD(&pool->drop_list); - INIT_XLIST_HEAD(&pool->clean_list); + init_llist_head(&pool->free_list); + init_llist_head(&pool->drop_list); + init_llist_head(&pool->clean_list); mutex_init(&pool->flush_lock); init_waitqueue_head(&pool->flush_wait); INIT_DELAYED_WORK(&pool->flush_worker, rds_ib_mr_pool_flush_worker); @@ -260,26 +260,18 @@ void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *pool) kfree(pool); } -static void refill_local(struct rds_ib_mr_pool *pool, struct xlist_head *xl, - struct rds_ib_mr **ibmr_ret) -{ - struct xlist_head *ibmr_xl; - ibmr_xl = xlist_del_head_fast(xl); - *ibmr_ret = list_entry(ibmr_xl, struct rds_ib_mr, xlist); -} - static inline struct rds_ib_mr *rds_ib_reuse_fmr(struct rds_ib_mr_pool *pool) { struct rds_ib_mr *ibmr = NULL; - struct xlist_head *ret; + struct llist_node *ret; unsigned long *flag; preempt_disable(); flag = &__get_cpu_var(clean_list_grace); set_bit(CLEAN_LIST_BUSY_BIT, flag); - ret = xlist_del_head(&pool->clean_list); + ret = llist_del_first(&pool->clean_list); if (ret) - ibmr = list_entry(ret, struct rds_ib_mr, xlist); + ibmr = llist_entry(ret, struct rds_ib_mr, llnode); clear_bit(CLEAN_LIST_BUSY_BIT, flag); preempt_enable(); @@ -529,46 +521,44 @@ static inline unsigned int rds_ib_flush_goal(struct rds_ib_mr_pool *pool, int fr } /* - * given an xlist of mrs, put them all into the list_head for more processing + * given an llist of mrs, put them all into the list_head for more processing */ -static void xlist_append_to_list(struct xlist_head *xlist, struct list_head *list) +static void llist_append_to_list(struct llist_head *llist, struct list_head *list) { struct rds_ib_mr *ibmr; - struct xlist_head splice; - struct xlist_head *cur; - struct xlist_head *next; - - splice.next = NULL; - xlist_splice(xlist, &splice); - cur = splice.next; - while (cur) { - next = cur->next; - ibmr = list_entry(cur, struct rds_ib_mr, xlist); + struct llist_node *node; + struct llist_node *next; + + node = llist_del_all(llist); + while (node) { + next = node->next; + ibmr = llist_entry(node, struct rds_ib_mr, llnode); list_add_tail(&ibmr->unmap_list, list); - cur = next; + node = next; } } /* - * this takes a list head of mrs and turns it into an xlist of clusters. - * each cluster has an xlist of MR_CLUSTER_SIZE mrs that are ready for - * reuse. + * this takes a list head of mrs and turns it into linked llist nodes + * of clusters. Each cluster has linked llist nodes of + * MR_CLUSTER_SIZE mrs that are ready for reuse. */ -static void list_append_to_xlist(struct rds_ib_mr_pool *pool, - struct list_head *list, struct xlist_head *xlist, - struct xlist_head **tail_ret) +static void list_to_llist_nodes(struct rds_ib_mr_pool *pool, + struct list_head *list, + struct llist_node **nodes_head, + struct llist_node **nodes_tail) { struct rds_ib_mr *ibmr; - struct xlist_head *cur_mr = xlist; - struct xlist_head *tail_mr = NULL; + struct llist_node *cur = NULL; + struct llist_node **next = nodes_head; list_for_each_entry(ibmr, list, unmap_list) { - tail_mr = &ibmr->xlist; - tail_mr->next = NULL; - cur_mr->next = tail_mr; - cur_mr = tail_mr; + cur = &ibmr->llnode; + *next = cur; + next = &cur->next; } - *tail_ret = tail_mr; + *next = NULL; + *nodes_tail = cur; } /* @@ -581,8 +571,8 @@ static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool, int free_all, struct rds_ib_mr **ibmr_ret) { struct rds_ib_mr *ibmr, *next; - struct xlist_head clean_xlist; - struct xlist_head *clean_tail; + struct llist_node *clean_nodes; + struct llist_node *clean_tail; LIST_HEAD(unmap_list); LIST_HEAD(fmr_list); unsigned long unpinned = 0; @@ -603,7 +593,7 @@ static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool, prepare_to_wait(&pool->flush_wait, &wait, TASK_UNINTERRUPTIBLE); - if (xlist_empty(&pool->clean_list)) + if (llist_empty(&pool->clean_list)) schedule(); ibmr = rds_ib_reuse_fmr(pool); @@ -628,10 +618,10 @@ static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool, /* Get the list of all MRs to be dropped. Ordering matters - * we want to put drop_list ahead of free_list. */ - xlist_append_to_list(&pool->drop_list, &unmap_list); - xlist_append_to_list(&pool->free_list, &unmap_list); + llist_append_to_list(&pool->drop_list, &unmap_list); + llist_append_to_list(&pool->free_list, &unmap_list); if (free_all) - xlist_append_to_list(&pool->clean_list, &unmap_list); + llist_append_to_list(&pool->clean_list, &unmap_list); free_goal = rds_ib_flush_goal(pool, free_all); @@ -663,22 +653,22 @@ static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool, if (!list_empty(&unmap_list)) { /* we have to make sure that none of the things we're about * to put on the clean list would race with other cpus trying - * to pull items off. The xlist would explode if we managed to + * to pull items off. The llist would explode if we managed to * remove something from the clean list and then add it back again - * while another CPU was spinning on that same item in xlist_del_head. + * while another CPU was spinning on that same item in llist_del_first. * - * This is pretty unlikely, but just in case wait for an xlist grace period + * This is pretty unlikely, but just in case wait for an llist grace period * here before adding anything back into the clean list. */ wait_clean_list_grace(); - list_append_to_xlist(pool, &unmap_list, &clean_xlist, &clean_tail); + list_to_llist_nodes(pool, &unmap_list, &clean_nodes, &clean_tail); if (ibmr_ret) - refill_local(pool, &clean_xlist, ibmr_ret); + *ibmr_ret = llist_entry(clean_nodes, struct rds_ib_mr, llnode); - /* refill_local may have emptied our list */ - if (!xlist_empty(&clean_xlist)) - xlist_add(clean_xlist.next, clean_tail, &pool->clean_list); + /* more than one entry in llist nodes */ + if (clean_nodes->next) + llist_add_batch(clean_nodes->next, clean_tail, &pool->clean_list); } @@ -711,9 +701,9 @@ void rds_ib_free_mr(void *trans_private, int invalidate) /* Return it to the pool's free list */ if (ibmr->remap_count >= pool->fmr_attr.max_maps) - xlist_add(&ibmr->xlist, &ibmr->xlist, &pool->drop_list); + llist_add(&ibmr->llnode, &pool->drop_list); else - xlist_add(&ibmr->xlist, &ibmr->xlist, &pool->free_list); + llist_add(&ibmr->llnode, &pool->free_list); atomic_add(ibmr->sg_len, &pool->free_pinned); atomic_inc(&pool->dirty_count); diff --git a/net/rds/info.c b/net/rds/info.c index 4fdf1b6e84ff..f1c016c4146e 100644 --- a/net/rds/info.c +++ b/net/rds/info.c @@ -34,6 +34,7 @@ #include <linux/seq_file.h> #include <linux/slab.h> #include <linux/proc_fs.h> +#include <linux/export.h> #include "rds.h" diff --git a/net/rds/iw.c b/net/rds/iw.c index f7474844f096..7826d46baa70 100644 --- a/net/rds/iw.c +++ b/net/rds/iw.c @@ -38,6 +38,7 @@ #include <linux/if_arp.h> #include <linux/delay.h> #include <linux/slab.h> +#include <linux/module.h> #include "rds.h" #include "iw.h" diff --git a/net/rds/iw_rdma.c b/net/rds/iw_rdma.c index 4e1de171866c..a817705ce2d0 100644 --- a/net/rds/iw_rdma.c +++ b/net/rds/iw_rdma.c @@ -477,17 +477,6 @@ void rds_iw_sync_mr(void *trans_private, int direction) } } -static inline unsigned int rds_iw_flush_goal(struct rds_iw_mr_pool *pool, int free_all) -{ - unsigned int item_count; - - item_count = atomic_read(&pool->item_count); - if (free_all) - return item_count; - - return 0; -} - /* * Flush our pool of MRs. * At a minimum, all currently unused MRs are unmapped. @@ -500,7 +489,7 @@ static int rds_iw_flush_mr_pool(struct rds_iw_mr_pool *pool, int free_all) LIST_HEAD(unmap_list); LIST_HEAD(kill_list); unsigned long flags; - unsigned int nfreed = 0, ncleaned = 0, unpinned = 0, free_goal; + unsigned int nfreed = 0, ncleaned = 0, unpinned = 0; int ret = 0; rds_iw_stats_inc(s_iw_rdma_mr_pool_flush); @@ -514,8 +503,6 @@ static int rds_iw_flush_mr_pool(struct rds_iw_mr_pool *pool, int free_all) list_splice_init(&pool->clean_list, &kill_list); spin_unlock_irqrestore(&pool->list_lock, flags); - free_goal = rds_iw_flush_goal(pool, free_all); - /* Batched invalidate of dirty MRs. * For FMR based MRs, the mappings on the unmap list are * actually members of an ibmr (ibmr->mapping). They either diff --git a/net/rds/message.c b/net/rds/message.c index 1fd3d29023d7..f0a4658f3273 100644 --- a/net/rds/message.c +++ b/net/rds/message.c @@ -32,6 +32,7 @@ */ #include <linux/kernel.h> #include <linux/slab.h> +#include <linux/export.h> #include "rds.h" diff --git a/net/rds/page.c b/net/rds/page.c index b82d63e77b03..2499cd108421 100644 --- a/net/rds/page.c +++ b/net/rds/page.c @@ -33,6 +33,7 @@ #include <linux/highmem.h> #include <linux/gfp.h> #include <linux/cpu.h> +#include <linux/export.h> #include "rds.h" diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c index f8760e1b6688..c2be901d19ee 100644 --- a/net/rds/rdma_transport.c +++ b/net/rds/rdma_transport.c @@ -30,6 +30,7 @@ * SOFTWARE. * */ +#include <linux/module.h> #include <rdma/rdma_cm.h> #include "rdma_transport.h" diff --git a/net/rds/rds.h b/net/rds/rds.h index da8adac2bf06..7eaba1831f0d 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -36,8 +36,8 @@ #define rdsdebug(fmt, args...) pr_debug("%s(): " fmt, __func__ , ##args) #else /* sigh, pr_debug() causes unused variable warnings */ -static inline void __attribute__ ((format (printf, 1, 2))) -rdsdebug(char *fmt, ...) +static inline __printf(1, 2) +void rdsdebug(char *fmt, ...) { } #endif @@ -625,8 +625,8 @@ void rds_for_each_conn_info(struct socket *sock, unsigned int len, struct rds_info_lengths *lens, int (*visitor)(struct rds_connection *, void *), size_t item_len); -void __rds_conn_error(struct rds_connection *conn, const char *, ...) - __attribute__ ((format (printf, 2, 3))); +__printf(2, 3) +void __rds_conn_error(struct rds_connection *conn, const char *, ...); #define rds_conn_error(conn, fmt...) \ __rds_conn_error(conn, KERN_WARNING "RDS: " fmt) diff --git a/net/rds/recv.c b/net/rds/recv.c index 596689e59272..bc3f8cd6d070 100644 --- a/net/rds/recv.c +++ b/net/rds/recv.c @@ -34,6 +34,7 @@ #include <linux/slab.h> #include <net/sock.h> #include <linux/in.h> +#include <linux/export.h> #include "rds.h" diff --git a/net/rds/send.c b/net/rds/send.c index aa57e22539ef..e2d63c59e7c2 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -31,11 +31,13 @@ * */ #include <linux/kernel.h> +#include <linux/moduleparam.h> #include <linux/gfp.h> #include <net/sock.h> #include <linux/in.h> #include <linux/list.h> #include <linux/ratelimit.h> +#include <linux/export.h> #include "rds.h" diff --git a/net/rds/stats.c b/net/rds/stats.c index 10c759ccac0c..7be790d60b90 100644 --- a/net/rds/stats.c +++ b/net/rds/stats.c @@ -33,6 +33,7 @@ #include <linux/percpu.h> #include <linux/seq_file.h> #include <linux/proc_fs.h> +#include <linux/export.h> #include "rds.h" diff --git a/net/rds/tcp.c b/net/rds/tcp.c index 8e0a32001c90..edac9ef2bc8b 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -33,6 +33,7 @@ #include <linux/kernel.h> #include <linux/slab.h> #include <linux/in.h> +#include <linux/module.h> #include <net/tcp.h> #include "rds.h" diff --git a/net/rds/threads.c b/net/rds/threads.c index 0fd90f8c5f59..65eaefcab241 100644 --- a/net/rds/threads.c +++ b/net/rds/threads.c @@ -32,6 +32,7 @@ */ #include <linux/kernel.h> #include <linux/random.h> +#include <linux/export.h> #include "rds.h" diff --git a/net/rds/xlist.h b/net/rds/xlist.h deleted file mode 100644 index e6b5190daddd..000000000000 --- a/net/rds/xlist.h +++ /dev/null @@ -1,80 +0,0 @@ -#ifndef _LINUX_XLIST_H -#define _LINUX_XLIST_H - -#include <linux/stddef.h> -#include <linux/poison.h> -#include <linux/prefetch.h> -#include <asm/system.h> - -struct xlist_head { - struct xlist_head *next; -}; - -static inline void INIT_XLIST_HEAD(struct xlist_head *list) -{ - list->next = NULL; -} - -static inline int xlist_empty(struct xlist_head *head) -{ - return head->next == NULL; -} - -static inline void xlist_add(struct xlist_head *new, struct xlist_head *tail, - struct xlist_head *head) -{ - struct xlist_head *cur; - struct xlist_head *check; - - while (1) { - cur = head->next; - tail->next = cur; - check = cmpxchg(&head->next, cur, new); - if (check == cur) - break; - } -} - -static inline struct xlist_head *xlist_del_head(struct xlist_head *head) -{ - struct xlist_head *cur; - struct xlist_head *check; - struct xlist_head *next; - - while (1) { - cur = head->next; - if (!cur) - goto out; - - next = cur->next; - check = cmpxchg(&head->next, cur, next); - if (check == cur) - goto out; - } -out: - return cur; -} - -static inline struct xlist_head *xlist_del_head_fast(struct xlist_head *head) -{ - struct xlist_head *cur; - - cur = head->next; - if (!cur) - return NULL; - - head->next = cur->next; - return cur; -} - -static inline void xlist_splice(struct xlist_head *list, - struct xlist_head *head) -{ - struct xlist_head *cur; - - WARN_ON(head->next); - cur = xchg(&list->next, NULL); - head->next = cur; -} - -#endif diff --git a/net/rfkill/core.c b/net/rfkill/core.c index be90640a2774..354760ebbbd2 100644 --- a/net/rfkill/core.c +++ b/net/rfkill/core.c @@ -235,7 +235,7 @@ static bool __rfkill_set_hw_state(struct rfkill *rfkill, else rfkill->state &= ~RFKILL_BLOCK_HW; *change = prev != blocked; - any = rfkill->state & RFKILL_BLOCK_ANY; + any = !!(rfkill->state & RFKILL_BLOCK_ANY); spin_unlock_irqrestore(&rfkill->lock, flags); rfkill_led_trigger_event(rfkill); @@ -644,7 +644,7 @@ static ssize_t rfkill_soft_store(struct device *dev, if (!capable(CAP_NET_ADMIN)) return -EPERM; - err = strict_strtoul(buf, 0, &state); + err = kstrtoul(buf, 0, &state); if (err) return err; @@ -688,7 +688,7 @@ static ssize_t rfkill_state_store(struct device *dev, if (!capable(CAP_NET_ADMIN)) return -EPERM; - err = strict_strtoul(buf, 0, &state); + err = kstrtoul(buf, 0, &state); if (err) return err; diff --git a/net/rfkill/input.c b/net/rfkill/input.c index 1bca6d49ec96..24c55c53e6a2 100644 --- a/net/rfkill/input.c +++ b/net/rfkill/input.c @@ -15,6 +15,7 @@ #include <linux/input.h> #include <linux/slab.h> +#include <linux/moduleparam.h> #include <linux/workqueue.h> #include <linux/init.h> #include <linux/rfkill.h> diff --git a/net/rfkill/rfkill-gpio.c b/net/rfkill/rfkill-gpio.c index 256c5ddd2d72..865adb61685a 100644 --- a/net/rfkill/rfkill-gpio.c +++ b/net/rfkill/rfkill-gpio.c @@ -101,6 +101,14 @@ static int rfkill_gpio_probe(struct platform_device *pdev) if (!rfkill) return -ENOMEM; + if (pdata->gpio_runtime_setup) { + ret = pdata->gpio_runtime_setup(pdev); + if (ret) { + pr_warn("%s: can't set up gpio\n", __func__); + goto fail_alloc; + } + } + rfkill->pdata = pdata; len = strlen(pdata->name); @@ -182,7 +190,10 @@ fail_alloc: static int rfkill_gpio_remove(struct platform_device *pdev) { struct rfkill_gpio_data *rfkill = platform_get_drvdata(pdev); + struct rfkill_gpio_platform_data *pdata = pdev->dev.platform_data; + if (pdata->gpio_runtime_close) + pdata->gpio_runtime_close(pdev); rfkill_unregister(rfkill->rfkill_dev); rfkill_destroy(rfkill->rfkill_dev); if (gpio_is_valid(rfkill->pdata->shutdown_gpio)) @@ -209,18 +220,7 @@ static struct platform_driver rfkill_gpio_driver = { }, }; -static int __init rfkill_gpio_init(void) -{ - return platform_driver_register(&rfkill_gpio_driver); -} - -static void __exit rfkill_gpio_exit(void) -{ - platform_driver_unregister(&rfkill_gpio_driver); -} - -module_init(rfkill_gpio_init); -module_exit(rfkill_gpio_exit); +module_platform_driver(rfkill_gpio_driver); MODULE_DESCRIPTION("gpio rfkill"); MODULE_AUTHOR("NVIDIA"); diff --git a/net/rfkill/rfkill-regulator.c b/net/rfkill/rfkill-regulator.c index 18dc512a10f3..11da3018a853 100644 --- a/net/rfkill/rfkill-regulator.c +++ b/net/rfkill/rfkill-regulator.c @@ -36,12 +36,12 @@ static int rfkill_regulator_set_block(void *data, bool blocked) if (blocked) { if (rfkill_data->reg_enabled) { regulator_disable(rfkill_data->vcc); - rfkill_data->reg_enabled = 0; + rfkill_data->reg_enabled = false; } } else { if (!rfkill_data->reg_enabled) { regulator_enable(rfkill_data->vcc); - rfkill_data->reg_enabled = 1; + rfkill_data->reg_enabled = true; } } @@ -90,14 +90,13 @@ static int __devinit rfkill_regulator_probe(struct platform_device *pdev) pdata->type, &rfkill_regulator_ops, rfkill_data); if (rf_kill == NULL) { - dev_err(&pdev->dev, "Cannot alloc rfkill device\n"); ret = -ENOMEM; goto err_rfkill_alloc; } if (regulator_is_enabled(vcc)) { dev_dbg(&pdev->dev, "Regulator already enabled\n"); - rfkill_data->reg_enabled = 1; + rfkill_data->reg_enabled = true; } rfkill_data->vcc = vcc; rfkill_data->rf_kill = rf_kill; @@ -145,17 +144,7 @@ static struct platform_driver rfkill_regulator_driver = { }, }; -static int __init rfkill_regulator_init(void) -{ - return platform_driver_register(&rfkill_regulator_driver); -} -module_init(rfkill_regulator_init); - -static void __exit rfkill_regulator_exit(void) -{ - platform_driver_unregister(&rfkill_regulator_driver); -} -module_exit(rfkill_regulator_exit); +module_platform_driver(rfkill_regulator_driver); MODULE_AUTHOR("Guiming Zhuo <gmzhuo@gmail.com>"); MODULE_AUTHOR("Antonio Ospite <ospite@studenti.unina.it>"); diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c index d389de197089..cd9b7ee60f3e 100644 --- a/net/rose/rose_route.c +++ b/net/rose/rose_route.c @@ -36,6 +36,7 @@ #include <linux/init.h> #include <net/rose.h> #include <linux/seq_file.h> +#include <linux/export.h> static unsigned int rose_neigh_no = 1; diff --git a/net/rxrpc/ar-ack.c b/net/rxrpc/ar-ack.c index f99cfce7ca97..c3126e864f3c 100644 --- a/net/rxrpc/ar-ack.c +++ b/net/rxrpc/ar-ack.c @@ -195,7 +195,7 @@ static void rxrpc_resend(struct rxrpc_call *call) sp = rxrpc_skb(txb); if (sp->need_resend) { - sp->need_resend = 0; + sp->need_resend = false; /* each Tx packet has a new serial number */ sp->hdr.serial = @@ -216,7 +216,7 @@ static void rxrpc_resend(struct rxrpc_call *call) } if (time_after_eq(jiffies + 1, sp->resend_at)) { - sp->need_resend = 1; + sp->need_resend = true; resend |= 1; } else if (resend & 2) { if (time_before(sp->resend_at, resend_at)) @@ -265,7 +265,7 @@ static void rxrpc_resend_timer(struct rxrpc_call *call) if (sp->need_resend) { ; } else if (time_after_eq(jiffies + 1, sp->resend_at)) { - sp->need_resend = 1; + sp->need_resend = true; resend |= 1; } else if (resend & 2) { if (time_before(sp->resend_at, resend_at)) @@ -314,11 +314,11 @@ static int rxrpc_process_soft_ACKs(struct rxrpc_call *call, switch (sacks[loop]) { case RXRPC_ACK_TYPE_ACK: - sp->need_resend = 0; + sp->need_resend = false; *p_txb |= 1; break; case RXRPC_ACK_TYPE_NACK: - sp->need_resend = 1; + sp->need_resend = true; *p_txb &= ~1; resend = 1; break; @@ -344,13 +344,13 @@ static int rxrpc_process_soft_ACKs(struct rxrpc_call *call, if (*p_txb & 1) { /* packet must have been discarded */ - sp->need_resend = 1; + sp->need_resend = true; *p_txb &= ~1; resend |= 1; } else if (sp->need_resend) { ; } else if (time_after_eq(jiffies + 1, sp->resend_at)) { - sp->need_resend = 1; + sp->need_resend = true; resend |= 1; } else if (resend & 2) { if (time_before(sp->resend_at, resend_at)) diff --git a/net/rxrpc/ar-key.c b/net/rxrpc/ar-key.c index 43ea7de2fc8e..4cba13e46ffd 100644 --- a/net/rxrpc/ar-key.c +++ b/net/rxrpc/ar-key.c @@ -306,10 +306,9 @@ static int rxrpc_krb5_decode_tagged_data(struct krb5_tagged_data *td, td->data_len = len; if (len > 0) { - td->data = kmalloc(len, GFP_KERNEL); + td->data = kmemdup(xdr, len, GFP_KERNEL); if (!td->data) return -ENOMEM; - memcpy(td->data, xdr, len); len = (len + 3) & ~3; toklen -= len; xdr += len >> 2; @@ -401,10 +400,9 @@ static int rxrpc_krb5_decode_ticket(u8 **_ticket, u16 *_tktlen, _debug("ticket len %u", len); if (len > 0) { - *_ticket = kmalloc(len, GFP_KERNEL); + *_ticket = kmemdup(xdr, len, GFP_KERNEL); if (!*_ticket) return -ENOMEM; - memcpy(*_ticket, xdr, len); len = (len + 3) & ~3; toklen -= len; xdr += len >> 2; diff --git a/net/rxrpc/ar-output.c b/net/rxrpc/ar-output.c index 5f22e263eda7..16ae88762d00 100644 --- a/net/rxrpc/ar-output.c +++ b/net/rxrpc/ar-output.c @@ -13,6 +13,7 @@ #include <linux/gfp.h> #include <linux/skbuff.h> #include <linux/circ_buf.h> +#include <linux/export.h> #include <net/sock.h> #include <net/af_rxrpc.h> #include "ar-internal.h" @@ -485,7 +486,7 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb, _proto("Tx DATA %%%u { #%u }", ntohl(sp->hdr.serial), ntohl(sp->hdr.seq)); - sp->need_resend = 0; + sp->need_resend = false; sp->resend_at = jiffies + rxrpc_resend_timeout * HZ; if (!test_and_set_bit(RXRPC_CALL_RUN_RTIMER, &call->flags)) { _debug("run timer"); @@ -507,7 +508,7 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb, if (ret < 0) { _debug("need instant resend %d", ret); - sp->need_resend = 1; + sp->need_resend = true; rxrpc_instant_resend(call); } diff --git a/net/rxrpc/ar-recvmsg.c b/net/rxrpc/ar-recvmsg.c index 0c65013e3bfe..4b48687c3890 100644 --- a/net/rxrpc/ar-recvmsg.c +++ b/net/rxrpc/ar-recvmsg.c @@ -11,6 +11,7 @@ #include <linux/net.h> #include <linux/skbuff.h> +#include <linux/export.h> #include <net/sock.h> #include <net/af_rxrpc.h> #include "ar-internal.h" diff --git a/net/sched/act_api.c b/net/sched/act_api.c index f2fb67e701a3..93fdf131bd75 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -20,6 +20,7 @@ #include <linux/init.h> #include <linux/kmod.h> #include <linux/err.h> +#include <linux/module.h> #include <net/net_namespace.h> #include <net/sock.h> #include <net/sch_generic.h> diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index 6994214db8f8..1d8bd0dbcd1f 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -21,10 +21,13 @@ #include <linux/ipv6.h> #include <linux/if_vlan.h> #include <linux/slab.h> +#include <linux/module.h> #include <net/pkt_cls.h> #include <net/ip.h> #include <net/route.h> +#include <net/flow_keys.h> + #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) #include <net/netfilter/nf_conntrack.h> #endif @@ -65,132 +68,37 @@ static inline u32 addr_fold(void *addr) return (a & 0xFFFFFFFF) ^ (BITS_PER_LONG > 32 ? a >> 32 : 0); } -static u32 flow_get_src(struct sk_buff *skb) +static u32 flow_get_src(const struct sk_buff *skb, const struct flow_keys *flow) { - switch (skb->protocol) { - case htons(ETH_P_IP): - if (pskb_network_may_pull(skb, sizeof(struct iphdr))) - return ntohl(ip_hdr(skb)->saddr); - break; - case htons(ETH_P_IPV6): - if (pskb_network_may_pull(skb, sizeof(struct ipv6hdr))) - return ntohl(ipv6_hdr(skb)->saddr.s6_addr32[3]); - break; - } - + if (flow->src) + return ntohl(flow->src); return addr_fold(skb->sk); } -static u32 flow_get_dst(struct sk_buff *skb) +static u32 flow_get_dst(const struct sk_buff *skb, const struct flow_keys *flow) { - switch (skb->protocol) { - case htons(ETH_P_IP): - if (pskb_network_may_pull(skb, sizeof(struct iphdr))) - return ntohl(ip_hdr(skb)->daddr); - break; - case htons(ETH_P_IPV6): - if (pskb_network_may_pull(skb, sizeof(struct ipv6hdr))) - return ntohl(ipv6_hdr(skb)->daddr.s6_addr32[3]); - break; - } - + if (flow->dst) + return ntohl(flow->dst); return addr_fold(skb_dst(skb)) ^ (__force u16)skb->protocol; } -static u32 flow_get_proto(struct sk_buff *skb) +static u32 flow_get_proto(const struct sk_buff *skb, const struct flow_keys *flow) { - switch (skb->protocol) { - case htons(ETH_P_IP): - return pskb_network_may_pull(skb, sizeof(struct iphdr)) ? - ip_hdr(skb)->protocol : 0; - case htons(ETH_P_IPV6): - return pskb_network_may_pull(skb, sizeof(struct ipv6hdr)) ? - ipv6_hdr(skb)->nexthdr : 0; - default: - return 0; - } + return flow->ip_proto; } -static u32 flow_get_proto_src(struct sk_buff *skb) +static u32 flow_get_proto_src(const struct sk_buff *skb, const struct flow_keys *flow) { - switch (skb->protocol) { - case htons(ETH_P_IP): { - struct iphdr *iph; - int poff; - - if (!pskb_network_may_pull(skb, sizeof(*iph))) - break; - iph = ip_hdr(skb); - if (ip_is_fragment(iph)) - break; - poff = proto_ports_offset(iph->protocol); - if (poff >= 0 && - pskb_network_may_pull(skb, iph->ihl * 4 + 2 + poff)) { - iph = ip_hdr(skb); - return ntohs(*(__be16 *)((void *)iph + iph->ihl * 4 + - poff)); - } - break; - } - case htons(ETH_P_IPV6): { - struct ipv6hdr *iph; - int poff; - - if (!pskb_network_may_pull(skb, sizeof(*iph))) - break; - iph = ipv6_hdr(skb); - poff = proto_ports_offset(iph->nexthdr); - if (poff >= 0 && - pskb_network_may_pull(skb, sizeof(*iph) + poff + 2)) { - iph = ipv6_hdr(skb); - return ntohs(*(__be16 *)((void *)iph + sizeof(*iph) + - poff)); - } - break; - } - } + if (flow->ports) + return ntohs(flow->port16[0]); return addr_fold(skb->sk); } -static u32 flow_get_proto_dst(struct sk_buff *skb) +static u32 flow_get_proto_dst(const struct sk_buff *skb, const struct flow_keys *flow) { - switch (skb->protocol) { - case htons(ETH_P_IP): { - struct iphdr *iph; - int poff; - - if (!pskb_network_may_pull(skb, sizeof(*iph))) - break; - iph = ip_hdr(skb); - if (ip_is_fragment(iph)) - break; - poff = proto_ports_offset(iph->protocol); - if (poff >= 0 && - pskb_network_may_pull(skb, iph->ihl * 4 + 4 + poff)) { - iph = ip_hdr(skb); - return ntohs(*(__be16 *)((void *)iph + iph->ihl * 4 + - 2 + poff)); - } - break; - } - case htons(ETH_P_IPV6): { - struct ipv6hdr *iph; - int poff; - - if (!pskb_network_may_pull(skb, sizeof(*iph))) - break; - iph = ipv6_hdr(skb); - poff = proto_ports_offset(iph->nexthdr); - if (poff >= 0 && - pskb_network_may_pull(skb, sizeof(*iph) + poff + 4)) { - iph = ipv6_hdr(skb); - return ntohs(*(__be16 *)((void *)iph + sizeof(*iph) + - poff + 2)); - } - break; - } - } + if (flow->ports) + return ntohs(flow->port16[1]); return addr_fold(skb_dst(skb)) ^ (__force u16)skb->protocol; } @@ -223,7 +131,7 @@ static u32 flow_get_nfct(const struct sk_buff *skb) #define CTTUPLE(skb, member) \ ({ \ enum ip_conntrack_info ctinfo; \ - struct nf_conn *ct = nf_ct_get(skb, &ctinfo); \ + const struct nf_conn *ct = nf_ct_get(skb, &ctinfo); \ if (ct == NULL) \ goto fallback; \ ct->tuplehash[CTINFO2DIR(ctinfo)].tuple.member; \ @@ -236,7 +144,7 @@ static u32 flow_get_nfct(const struct sk_buff *skb) }) #endif -static u32 flow_get_nfct_src(struct sk_buff *skb) +static u32 flow_get_nfct_src(const struct sk_buff *skb, const struct flow_keys *flow) { switch (skb->protocol) { case htons(ETH_P_IP): @@ -245,10 +153,10 @@ static u32 flow_get_nfct_src(struct sk_buff *skb) return ntohl(CTTUPLE(skb, src.u3.ip6[3])); } fallback: - return flow_get_src(skb); + return flow_get_src(skb, flow); } -static u32 flow_get_nfct_dst(struct sk_buff *skb) +static u32 flow_get_nfct_dst(const struct sk_buff *skb, const struct flow_keys *flow) { switch (skb->protocol) { case htons(ETH_P_IP): @@ -257,21 +165,21 @@ static u32 flow_get_nfct_dst(struct sk_buff *skb) return ntohl(CTTUPLE(skb, dst.u3.ip6[3])); } fallback: - return flow_get_dst(skb); + return flow_get_dst(skb, flow); } -static u32 flow_get_nfct_proto_src(struct sk_buff *skb) +static u32 flow_get_nfct_proto_src(const struct sk_buff *skb, const struct flow_keys *flow) { return ntohs(CTTUPLE(skb, src.u.all)); fallback: - return flow_get_proto_src(skb); + return flow_get_proto_src(skb, flow); } -static u32 flow_get_nfct_proto_dst(struct sk_buff *skb) +static u32 flow_get_nfct_proto_dst(const struct sk_buff *skb, const struct flow_keys *flow) { return ntohs(CTTUPLE(skb, dst.u.all)); fallback: - return flow_get_proto_dst(skb); + return flow_get_proto_dst(skb, flow); } static u32 flow_get_rtclassid(const struct sk_buff *skb) @@ -311,19 +219,19 @@ static u32 flow_get_rxhash(struct sk_buff *skb) return skb_get_rxhash(skb); } -static u32 flow_key_get(struct sk_buff *skb, int key) +static u32 flow_key_get(struct sk_buff *skb, int key, struct flow_keys *flow) { switch (key) { case FLOW_KEY_SRC: - return flow_get_src(skb); + return flow_get_src(skb, flow); case FLOW_KEY_DST: - return flow_get_dst(skb); + return flow_get_dst(skb, flow); case FLOW_KEY_PROTO: - return flow_get_proto(skb); + return flow_get_proto(skb, flow); case FLOW_KEY_PROTO_SRC: - return flow_get_proto_src(skb); + return flow_get_proto_src(skb, flow); case FLOW_KEY_PROTO_DST: - return flow_get_proto_dst(skb); + return flow_get_proto_dst(skb, flow); case FLOW_KEY_IIF: return flow_get_iif(skb); case FLOW_KEY_PRIORITY: @@ -333,13 +241,13 @@ static u32 flow_key_get(struct sk_buff *skb, int key) case FLOW_KEY_NFCT: return flow_get_nfct(skb); case FLOW_KEY_NFCT_SRC: - return flow_get_nfct_src(skb); + return flow_get_nfct_src(skb, flow); case FLOW_KEY_NFCT_DST: - return flow_get_nfct_dst(skb); + return flow_get_nfct_dst(skb, flow); case FLOW_KEY_NFCT_PROTO_SRC: - return flow_get_nfct_proto_src(skb); + return flow_get_nfct_proto_src(skb, flow); case FLOW_KEY_NFCT_PROTO_DST: - return flow_get_nfct_proto_dst(skb); + return flow_get_nfct_proto_dst(skb, flow); case FLOW_KEY_RTCLASSID: return flow_get_rtclassid(skb); case FLOW_KEY_SKUID: @@ -356,6 +264,16 @@ static u32 flow_key_get(struct sk_buff *skb, int key) } } +#define FLOW_KEYS_NEEDED ((1 << FLOW_KEY_SRC) | \ + (1 << FLOW_KEY_DST) | \ + (1 << FLOW_KEY_PROTO) | \ + (1 << FLOW_KEY_PROTO_SRC) | \ + (1 << FLOW_KEY_PROTO_DST) | \ + (1 << FLOW_KEY_NFCT_SRC) | \ + (1 << FLOW_KEY_NFCT_DST) | \ + (1 << FLOW_KEY_NFCT_PROTO_SRC) | \ + (1 << FLOW_KEY_NFCT_PROTO_DST)) + static int flow_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res) { @@ -367,17 +285,20 @@ static int flow_classify(struct sk_buff *skb, const struct tcf_proto *tp, int r; list_for_each_entry(f, &head->filters, list) { - u32 keys[f->nkeys]; + u32 keys[FLOW_KEY_MAX + 1]; + struct flow_keys flow_keys; if (!tcf_em_tree_match(skb, &f->ematches, NULL)) continue; keymask = f->keymask; + if (keymask & FLOW_KEYS_NEEDED) + skb_flow_dissect(skb, &flow_keys); for (n = 0; n < f->nkeys; n++) { key = ffs(keymask) - 1; keymask &= ~(1 << key); - keys[n] = flow_key_get(skb, key); + keys[n] = flow_key_get(skb, key, &flow_keys); } if (f->mode == FLOW_MODE_HASH) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index dca6c1a576f7..3d8981fde301 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -618,20 +618,24 @@ void qdisc_class_hash_remove(struct Qdisc_class_hash *clhash, } EXPORT_SYMBOL(qdisc_class_hash_remove); -/* Allocate an unique handle from space managed by kernel */ - +/* Allocate an unique handle from space managed by kernel + * Possible range is [8000-FFFF]:0000 (0x8000 values) + */ static u32 qdisc_alloc_handle(struct net_device *dev) { - int i = 0x10000; + int i = 0x8000; static u32 autohandle = TC_H_MAKE(0x80000000U, 0); do { autohandle += TC_H_MAKE(0x10000U, 0); if (autohandle == TC_H_MAKE(TC_H_ROOT, 0)) autohandle = TC_H_MAKE(0x80000000U, 0); - } while (qdisc_lookup(dev, autohandle) && --i > 0); + if (!qdisc_lookup(dev, autohandle)) + return autohandle; + cond_resched(); + } while (--i > 0); - return i > 0 ? autohandle : 0; + return 0; } void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n) diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c index 3422b25df9e4..e465064d39a3 100644 --- a/net/sched/sch_choke.c +++ b/net/sched/sch_choke.c @@ -19,10 +19,7 @@ #include <net/pkt_sched.h> #include <net/inet_ecn.h> #include <net/red.h> -#include <linux/ip.h> -#include <net/ip.h> -#include <linux/ipv6.h> -#include <net/ipv6.h> +#include <net/flow_keys.h> /* CHOKe stateless AQM for fair bandwidth allocation @@ -60,6 +57,7 @@ struct choke_sched_data { struct red_parms parms; /* Variables */ + struct red_vars vars; struct tcf_proto *filter_list; struct { u32 prob_drop; /* Early probability drops */ @@ -142,85 +140,10 @@ static void choke_drop_by_idx(struct Qdisc *sch, unsigned int idx) --sch->q.qlen; } -/* - * Compare flow of two packets - * Returns true only if source and destination address and port match. - * false for special cases - */ -static bool choke_match_flow(struct sk_buff *skb1, - struct sk_buff *skb2) -{ - int off1, off2, poff; - const u32 *ports1, *ports2; - u8 ip_proto; - __u32 hash1; - - if (skb1->protocol != skb2->protocol) - return false; - - /* Use hash value as quick check - * Assumes that __skb_get_rxhash makes IP header and ports linear - */ - hash1 = skb_get_rxhash(skb1); - if (!hash1 || hash1 != skb_get_rxhash(skb2)) - return false; - - /* Probably match, but be sure to avoid hash collisions */ - off1 = skb_network_offset(skb1); - off2 = skb_network_offset(skb2); - - switch (skb1->protocol) { - case __constant_htons(ETH_P_IP): { - const struct iphdr *ip1, *ip2; - - ip1 = (const struct iphdr *) (skb1->data + off1); - ip2 = (const struct iphdr *) (skb2->data + off2); - - ip_proto = ip1->protocol; - if (ip_proto != ip2->protocol || - ip1->saddr != ip2->saddr || ip1->daddr != ip2->daddr) - return false; - - if (ip_is_fragment(ip1) | ip_is_fragment(ip2)) - ip_proto = 0; - off1 += ip1->ihl * 4; - off2 += ip2->ihl * 4; - break; - } - - case __constant_htons(ETH_P_IPV6): { - const struct ipv6hdr *ip1, *ip2; - - ip1 = (const struct ipv6hdr *) (skb1->data + off1); - ip2 = (const struct ipv6hdr *) (skb2->data + off2); - - ip_proto = ip1->nexthdr; - if (ip_proto != ip2->nexthdr || - ipv6_addr_cmp(&ip1->saddr, &ip2->saddr) || - ipv6_addr_cmp(&ip1->daddr, &ip2->daddr)) - return false; - off1 += 40; - off2 += 40; - } - - default: /* Maybe compare MAC header here? */ - return false; - } - - poff = proto_ports_offset(ip_proto); - if (poff < 0) - return true; - - off1 += poff; - off2 += poff; - - ports1 = (__force u32 *)(skb1->data + off1); - ports2 = (__force u32 *)(skb2->data + off2); - return *ports1 == *ports2; -} - struct choke_skb_cb { - u16 classid; + u16 classid; + u8 keys_valid; + struct flow_keys keys; }; static inline struct choke_skb_cb *choke_skb_cb(const struct sk_buff *skb) @@ -241,6 +164,32 @@ static u16 choke_get_classid(const struct sk_buff *skb) } /* + * Compare flow of two packets + * Returns true only if source and destination address and port match. + * false for special cases + */ +static bool choke_match_flow(struct sk_buff *skb1, + struct sk_buff *skb2) +{ + if (skb1->protocol != skb2->protocol) + return false; + + if (!choke_skb_cb(skb1)->keys_valid) { + choke_skb_cb(skb1)->keys_valid = 1; + skb_flow_dissect(skb1, &choke_skb_cb(skb1)->keys); + } + + if (!choke_skb_cb(skb2)->keys_valid) { + choke_skb_cb(skb2)->keys_valid = 1; + skb_flow_dissect(skb2, &choke_skb_cb(skb2)->keys); + } + + return !memcmp(&choke_skb_cb(skb1)->keys, + &choke_skb_cb(skb2)->keys, + sizeof(struct flow_keys)); +} + +/* * Classify flow using either: * 1. pre-existing classification result in skb * 2. fast internal classification @@ -317,7 +266,7 @@ static bool choke_match_random(const struct choke_sched_data *q, static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch) { struct choke_sched_data *q = qdisc_priv(sch); - struct red_parms *p = &q->parms; + const struct red_parms *p = &q->parms; int ret = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; if (q->filter_list) { @@ -326,14 +275,15 @@ static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch) goto other_drop; /* Packet was eaten by filter */ } + choke_skb_cb(skb)->keys_valid = 0; /* Compute average queue usage (see RED) */ - p->qavg = red_calc_qavg(p, sch->q.qlen); - if (red_is_idling(p)) - red_end_of_idle_period(p); + q->vars.qavg = red_calc_qavg(p, &q->vars, sch->q.qlen); + if (red_is_idling(&q->vars)) + red_end_of_idle_period(&q->vars); /* Is queue small? */ - if (p->qavg <= p->qth_min) - p->qcount = -1; + if (q->vars.qavg <= p->qth_min) + q->vars.qcount = -1; else { unsigned int idx; @@ -345,8 +295,8 @@ static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch) } /* Queue is large, always mark/drop */ - if (p->qavg > p->qth_max) { - p->qcount = -1; + if (q->vars.qavg > p->qth_max) { + q->vars.qcount = -1; sch->qstats.overlimits++; if (use_harddrop(q) || !use_ecn(q) || @@ -356,10 +306,10 @@ static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch) } q->stats.forced_mark++; - } else if (++p->qcount) { - if (red_mark_probability(p, p->qavg)) { - p->qcount = 0; - p->qR = red_random(p); + } else if (++q->vars.qcount) { + if (red_mark_probability(p, &q->vars, q->vars.qavg)) { + q->vars.qcount = 0; + q->vars.qR = red_random(p); sch->qstats.overlimits++; if (!use_ecn(q) || !INET_ECN_set_ce(skb)) { @@ -370,7 +320,7 @@ static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch) q->stats.prob_mark++; } } else - p->qR = red_random(p); + q->vars.qR = red_random(p); } /* Admit new packet */ @@ -404,8 +354,8 @@ static struct sk_buff *choke_dequeue(struct Qdisc *sch) struct sk_buff *skb; if (q->head == q->tail) { - if (!red_is_idling(&q->parms)) - red_start_of_idle_period(&q->parms); + if (!red_is_idling(&q->vars)) + red_start_of_idle_period(&q->vars); return NULL; } @@ -428,8 +378,8 @@ static unsigned int choke_drop(struct Qdisc *sch) if (len > 0) q->stats.other++; else { - if (!red_is_idling(&q->parms)) - red_start_of_idle_period(&q->parms); + if (!red_is_idling(&q->vars)) + red_start_of_idle_period(&q->vars); } return len; @@ -439,12 +389,13 @@ static void choke_reset(struct Qdisc *sch) { struct choke_sched_data *q = qdisc_priv(sch); - red_restart(&q->parms); + red_restart(&q->vars); } static const struct nla_policy choke_policy[TCA_CHOKE_MAX + 1] = { [TCA_CHOKE_PARMS] = { .len = sizeof(struct tc_red_qopt) }, [TCA_CHOKE_STAB] = { .len = RED_STAB_SIZE }, + [TCA_CHOKE_MAX_P] = { .type = NLA_U32 }, }; @@ -466,6 +417,7 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt) int err; struct sk_buff **old = NULL; unsigned int mask; + u32 max_P; if (opt == NULL) return -EINVAL; @@ -478,6 +430,8 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt) tb[TCA_CHOKE_STAB] == NULL) return -EINVAL; + max_P = tb[TCA_CHOKE_MAX_P] ? nla_get_u32(tb[TCA_CHOKE_MAX_P]) : 0; + ctl = nla_data(tb[TCA_CHOKE_PARMS]); if (ctl->limit > CHOKE_MAX_QUEUE) @@ -527,10 +481,12 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt) red_set_parms(&q->parms, ctl->qth_min, ctl->qth_max, ctl->Wlog, ctl->Plog, ctl->Scell_log, - nla_data(tb[TCA_CHOKE_STAB])); + nla_data(tb[TCA_CHOKE_STAB]), + max_P); + red_set_vars(&q->vars); if (q->head == q->tail) - red_end_of_idle_period(&q->parms); + red_end_of_idle_period(&q->vars); sch_tree_unlock(sch); choke_free(old); @@ -561,6 +517,7 @@ static int choke_dump(struct Qdisc *sch, struct sk_buff *skb) goto nla_put_failure; NLA_PUT(skb, TCA_CHOKE_PARMS, sizeof(opt), &opt); + NLA_PUT_U32(skb, TCA_CHOKE_MAX_P, q->parms.max_P); return nla_nest_end(skb, opts); nla_put_failure: diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 69fca2798804..67fc573e013a 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -60,7 +60,7 @@ static inline struct sk_buff *dequeue_skb(struct Qdisc *q) /* check the reason of requeuing without tx lock first */ txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); - if (!netif_tx_queue_frozen_or_stopped(txq)) { + if (!netif_xmit_frozen_or_stopped(txq)) { q->gso_skb = NULL; q->q.qlen--; } else @@ -121,7 +121,7 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q, spin_unlock(root_lock); HARD_TX_LOCK(dev, txq, smp_processor_id()); - if (!netif_tx_queue_frozen_or_stopped(txq)) + if (!netif_xmit_frozen_or_stopped(txq)) ret = dev_hard_start_xmit(skb, dev, txq); HARD_TX_UNLOCK(dev, txq); @@ -143,7 +143,7 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q, ret = dev_requeue_skb(skb, q); } - if (ret && netif_tx_queue_frozen_or_stopped(txq)) + if (ret && netif_xmit_frozen_or_stopped(txq)) ret = 0; return ret; @@ -242,10 +242,11 @@ static void dev_watchdog(unsigned long arg) * old device drivers set dev->trans_start */ trans_start = txq->trans_start ? : dev->trans_start; - if (netif_tx_queue_stopped(txq) && + if (netif_xmit_stopped(txq) && time_after(jiffies, (trans_start + dev->watchdog_timeo))) { some_queue_timedout = 1; + txq->trans_timeout++; break; } } diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index b9493a09a870..0b15236be7b6 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c @@ -34,13 +34,14 @@ struct gred_sched; struct gred_sched_data { u32 limit; /* HARD maximal queue length */ - u32 DP; /* the drop pramaters */ + u32 DP; /* the drop parameters */ u32 bytesin; /* bytes seen on virtualQ so far*/ u32 packetsin; /* packets seen on virtualQ so far*/ u32 backlog; /* bytes on the virtualQ */ u8 prio; /* the prio of this vq */ struct red_parms parms; + struct red_vars vars; struct red_stats stats; }; @@ -55,7 +56,7 @@ struct gred_sched { u32 red_flags; u32 DPs; u32 def; - struct red_parms wred_set; + struct red_vars wred_set; }; static inline int gred_wred_mode(struct gred_sched *table) @@ -125,17 +126,17 @@ static inline u16 tc_index_to_dp(struct sk_buff *skb) return skb->tc_index & GRED_VQ_MASK; } -static inline void gred_load_wred_set(struct gred_sched *table, +static inline void gred_load_wred_set(const struct gred_sched *table, struct gred_sched_data *q) { - q->parms.qavg = table->wred_set.qavg; - q->parms.qidlestart = table->wred_set.qidlestart; + q->vars.qavg = table->wred_set.qavg; + q->vars.qidlestart = table->wred_set.qidlestart; } static inline void gred_store_wred_set(struct gred_sched *table, struct gred_sched_data *q) { - table->wred_set.qavg = q->parms.qavg; + table->wred_set.qavg = q->vars.qavg; } static inline int gred_use_ecn(struct gred_sched *t) @@ -170,7 +171,7 @@ static int gred_enqueue(struct sk_buff *skb, struct Qdisc *sch) goto drop; } - /* fix tc_index? --could be controvesial but needed for + /* fix tc_index? --could be controversial but needed for requeueing */ skb->tc_index = (skb->tc_index & ~GRED_VQ_MASK) | dp; } @@ -181,8 +182,8 @@ static int gred_enqueue(struct sk_buff *skb, struct Qdisc *sch) for (i = 0; i < t->DPs; i++) { if (t->tab[i] && t->tab[i]->prio < q->prio && - !red_is_idling(&t->tab[i]->parms)) - qavg += t->tab[i]->parms.qavg; + !red_is_idling(&t->tab[i]->vars)) + qavg += t->tab[i]->vars.qavg; } } @@ -193,15 +194,17 @@ static int gred_enqueue(struct sk_buff *skb, struct Qdisc *sch) if (gred_wred_mode(t)) gred_load_wred_set(t, q); - q->parms.qavg = red_calc_qavg(&q->parms, gred_backlog(t, q, sch)); + q->vars.qavg = red_calc_qavg(&q->parms, + &q->vars, + gred_backlog(t, q, sch)); - if (red_is_idling(&q->parms)) - red_end_of_idle_period(&q->parms); + if (red_is_idling(&q->vars)) + red_end_of_idle_period(&q->vars); if (gred_wred_mode(t)) gred_store_wred_set(t, q); - switch (red_action(&q->parms, q->parms.qavg + qavg)) { + switch (red_action(&q->parms, &q->vars, q->vars.qavg + qavg)) { case RED_DONT_MARK: break; @@ -260,7 +263,7 @@ static struct sk_buff *gred_dequeue(struct Qdisc *sch) q->backlog -= qdisc_pkt_len(skb); if (!q->backlog && !gred_wred_mode(t)) - red_start_of_idle_period(&q->parms); + red_start_of_idle_period(&q->vars); } return skb; @@ -293,7 +296,7 @@ static unsigned int gred_drop(struct Qdisc *sch) q->stats.other++; if (!q->backlog && !gred_wred_mode(t)) - red_start_of_idle_period(&q->parms); + red_start_of_idle_period(&q->vars); } qdisc_drop(skb, sch); @@ -320,7 +323,7 @@ static void gred_reset(struct Qdisc *sch) if (!q) continue; - red_restart(&q->parms); + red_restart(&q->vars); q->backlog = 0; } } @@ -379,29 +382,31 @@ static inline int gred_change_table_def(struct Qdisc *sch, struct nlattr *dps) } static inline int gred_change_vq(struct Qdisc *sch, int dp, - struct tc_gred_qopt *ctl, int prio, u8 *stab) + struct tc_gred_qopt *ctl, int prio, + u8 *stab, u32 max_P, + struct gred_sched_data **prealloc) { struct gred_sched *table = qdisc_priv(sch); - struct gred_sched_data *q; + struct gred_sched_data *q = table->tab[dp]; - if (table->tab[dp] == NULL) { - table->tab[dp] = kzalloc(sizeof(*q), GFP_KERNEL); - if (table->tab[dp] == NULL) + if (!q) { + table->tab[dp] = q = *prealloc; + *prealloc = NULL; + if (!q) return -ENOMEM; } - q = table->tab[dp]; q->DP = dp; q->prio = prio; q->limit = ctl->limit; if (q->backlog == 0) - red_end_of_idle_period(&q->parms); + red_end_of_idle_period(&q->vars); red_set_parms(&q->parms, ctl->qth_min, ctl->qth_max, ctl->Wlog, ctl->Plog, - ctl->Scell_log, stab); - + ctl->Scell_log, stab, max_P); + red_set_vars(&q->vars); return 0; } @@ -409,6 +414,7 @@ static const struct nla_policy gred_policy[TCA_GRED_MAX + 1] = { [TCA_GRED_PARMS] = { .len = sizeof(struct tc_gred_qopt) }, [TCA_GRED_STAB] = { .len = 256 }, [TCA_GRED_DPS] = { .len = sizeof(struct tc_gred_sopt) }, + [TCA_GRED_MAX_P] = { .type = NLA_U32 }, }; static int gred_change(struct Qdisc *sch, struct nlattr *opt) @@ -418,6 +424,8 @@ static int gred_change(struct Qdisc *sch, struct nlattr *opt) struct nlattr *tb[TCA_GRED_MAX + 1]; int err, prio = GRED_DEF_PRIO; u8 *stab; + u32 max_P; + struct gred_sched_data *prealloc; if (opt == NULL) return -EINVAL; @@ -433,6 +441,8 @@ static int gred_change(struct Qdisc *sch, struct nlattr *opt) tb[TCA_GRED_STAB] == NULL) return -EINVAL; + max_P = tb[TCA_GRED_MAX_P] ? nla_get_u32(tb[TCA_GRED_MAX_P]) : 0; + err = -EINVAL; ctl = nla_data(tb[TCA_GRED_PARMS]); stab = nla_data(tb[TCA_GRED_STAB]); @@ -455,9 +465,10 @@ static int gred_change(struct Qdisc *sch, struct nlattr *opt) prio = ctl->prio; } + prealloc = kzalloc(sizeof(*prealloc), GFP_KERNEL); sch_tree_lock(sch); - err = gred_change_vq(sch, ctl->DP, ctl, prio, stab); + err = gred_change_vq(sch, ctl->DP, ctl, prio, stab, max_P, &prealloc); if (err < 0) goto errout_locked; @@ -471,6 +482,7 @@ static int gred_change(struct Qdisc *sch, struct nlattr *opt) errout_locked: sch_tree_unlock(sch); + kfree(prealloc); errout: return err; } @@ -498,6 +510,7 @@ static int gred_dump(struct Qdisc *sch, struct sk_buff *skb) struct gred_sched *table = qdisc_priv(sch); struct nlattr *parms, *opts = NULL; int i; + u32 max_p[MAX_DPs]; struct tc_gred_sopt sopt = { .DPs = table->DPs, .def_DP = table->def, @@ -509,6 +522,14 @@ static int gred_dump(struct Qdisc *sch, struct sk_buff *skb) if (opts == NULL) goto nla_put_failure; NLA_PUT(skb, TCA_GRED_DPS, sizeof(sopt), &sopt); + + for (i = 0; i < MAX_DPs; i++) { + struct gred_sched_data *q = table->tab[i]; + + max_p[i] = q ? q->parms.max_P : 0; + } + NLA_PUT(skb, TCA_GRED_MAX_P, sizeof(max_p), max_p); + parms = nla_nest_start(skb, TCA_GRED_PARMS); if (parms == NULL) goto nla_put_failure; @@ -545,12 +566,12 @@ static int gred_dump(struct Qdisc *sch, struct sk_buff *skb) opt.bytesin = q->bytesin; if (gred_wred_mode(table)) { - q->parms.qidlestart = - table->tab[table->def]->parms.qidlestart; - q->parms.qavg = table->tab[table->def]->parms.qavg; + q->vars.qidlestart = + table->tab[table->def]->vars.qidlestart; + q->vars.qavg = table->tab[table->def]->vars.qavg; } - opt.qave = red_calc_qavg(&q->parms, q->parms.qavg); + opt.qave = red_calc_qavg(&q->parms, &q->vars, q->vars.qavg); append_opt: if (nla_append(skb, sizeof(opt), &opt) < 0) diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 6488e6425652..9bdca2e011e9 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -1368,6 +1368,7 @@ hfsc_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct tc_hfsc_stats xstats; cl->qstats.qlen = cl->qdisc->q.qlen; + cl->qstats.backlog = cl->qdisc->qstats.backlog; xstats.level = cl->level; xstats.period = cl->cl_vtperiod; xstats.work = cl->cl_total; @@ -1561,6 +1562,15 @@ hfsc_dump_qdisc(struct Qdisc *sch, struct sk_buff *skb) struct hfsc_sched *q = qdisc_priv(sch); unsigned char *b = skb_tail_pointer(skb); struct tc_hfsc_qopt qopt; + struct hfsc_class *cl; + struct hlist_node *n; + unsigned int i; + + sch->qstats.backlog = 0; + for (i = 0; i < q->clhash.hashsize; i++) { + hlist_for_each_entry(cl, n, &q->clhash.hash[i], cl_common.hnode) + sch->qstats.backlog += cl->qdisc->qstats.backlog; + } qopt.defcls = q->defcls; NLA_PUT(skb, TCA_OPTIONS, sizeof(qopt), &qopt); diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c index ec5cbc848963..0a4b2f9a0094 100644 --- a/net/sched/sch_mq.c +++ b/net/sched/sch_mq.c @@ -11,6 +11,7 @@ #include <linux/types.h> #include <linux/slab.h> #include <linux/kernel.h> +#include <linux/export.h> #include <linux/string.h> #include <linux/errno.h> #include <linux/skbuff.h> diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index ea17cbed29ef..28de43092330 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -14,6 +14,7 @@ #include <linux/string.h> #include <linux/errno.h> #include <linux/skbuff.h> +#include <linux/module.h> #include <net/netlink.h> #include <net/pkt_sched.h> #include <net/sch_generic.h> @@ -106,7 +107,7 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt) if (!netif_is_multiqueue(dev)) return -EOPNOTSUPP; - if (nla_len(opt) < sizeof(*qopt)) + if (!opt || nla_len(opt) < sizeof(*qopt)) return -EINVAL; qopt = nla_data(opt); diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c index edc1950e0e77..49131d7a7446 100644 --- a/net/sched/sch_multiq.c +++ b/net/sched/sch_multiq.c @@ -107,7 +107,8 @@ static struct sk_buff *multiq_dequeue(struct Qdisc *sch) /* Check that target subqueue is available before * pulling an skb to avoid head-of-line blocking. */ - if (!__netif_subqueue_stopped(qdisc_dev(sch), q->curband)) { + if (!netif_xmit_stopped( + netdev_get_tx_queue(qdisc_dev(sch), q->curband))) { qdisc = q->queues[q->curband]; skb = qdisc->dequeue(qdisc); if (skb) { @@ -138,7 +139,8 @@ static struct sk_buff *multiq_peek(struct Qdisc *sch) /* Check that target subqueue is available before * pulling an skb to avoid head-of-line blocking. */ - if (!__netif_subqueue_stopped(qdisc_dev(sch), curband)) { + if (!netif_xmit_stopped( + netdev_get_tx_queue(qdisc_dev(sch), curband))) { qdisc = q->queues[curband]; skb = qdisc->ops->peek(qdisc); if (skb) diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index eb3b9a86c6ed..e7e1d0b57b3d 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -22,6 +22,7 @@ #include <linux/skbuff.h> #include <linux/vmalloc.h> #include <linux/rtnetlink.h> +#include <linux/reciprocal_div.h> #include <net/netlink.h> #include <net/pkt_sched.h> @@ -66,7 +67,11 @@ */ struct netem_sched_data { + /* internal t(ime)fifo qdisc uses sch->q and sch->limit */ + + /* optional qdisc for classful handling (NULL at netem init) */ struct Qdisc *qdisc; + struct qdisc_watchdog watchdog; psched_tdiff_t latency; @@ -79,6 +84,11 @@ struct netem_sched_data { u32 duplicate; u32 reorder; u32 corrupt; + u32 rate; + s32 packet_overhead; + u32 cell_size; + u32 cell_size_reciprocal; + s32 cell_overhead; struct crndstate { u32 last; @@ -111,7 +121,9 @@ struct netem_sched_data { }; -/* Time stamp put into socket buffer control block */ +/* Time stamp put into socket buffer control block + * Only valid when skbs are in our internal t(ime)fifo queue. + */ struct netem_skb_cb { psched_time_t time_to_send; }; @@ -298,6 +310,51 @@ static psched_tdiff_t tabledist(psched_tdiff_t mu, psched_tdiff_t sigma, return x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu; } +static psched_time_t packet_len_2_sched_time(unsigned int len, struct netem_sched_data *q) +{ + u64 ticks; + + len += q->packet_overhead; + + if (q->cell_size) { + u32 cells = reciprocal_divide(len, q->cell_size_reciprocal); + + if (len > cells * q->cell_size) /* extra cell needed for remainder */ + cells++; + len = cells * (q->cell_size + q->cell_overhead); + } + + ticks = (u64)len * NSEC_PER_SEC; + + do_div(ticks, q->rate); + return PSCHED_NS2TICKS(ticks); +} + +static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch) +{ + struct sk_buff_head *list = &sch->q; + psched_time_t tnext = netem_skb_cb(nskb)->time_to_send; + struct sk_buff *skb; + + if (likely(skb_queue_len(list) < sch->limit)) { + skb = skb_peek_tail(list); + /* Optimize for add at tail */ + if (likely(!skb || tnext >= netem_skb_cb(skb)->time_to_send)) + return qdisc_enqueue_tail(nskb, sch); + + skb_queue_reverse_walk(list, skb) { + if (tnext >= netem_skb_cb(skb)->time_to_send) + break; + } + + __skb_queue_after(list, skb, nskb); + sch->qstats.backlog += qdisc_pkt_len(nskb); + return NET_XMIT_SUCCESS; + } + + return qdisc_reshape_fail(nskb, sch); +} + /* * Insert one skb into qdisc. * Note: parent depends on return value to account for queue length. @@ -371,9 +428,27 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) &q->delay_cor, q->delay_dist); now = psched_get_time(); + + if (q->rate) { + struct sk_buff_head *list = &sch->q; + + delay += packet_len_2_sched_time(skb->len, q); + + if (!skb_queue_empty(list)) { + /* + * Last packet in queue is reference point (now). + * First packet in queue is already in flight, + * calculate this time bonus and substract + * from delay. + */ + delay -= now - netem_skb_cb(skb_peek(list))->time_to_send; + now = netem_skb_cb(skb_peek_tail(list))->time_to_send; + } + } + cb->time_to_send = now + delay; ++q->counter; - ret = qdisc_enqueue(skb, q->qdisc); + ret = tfifo_enqueue(skb, sch); } else { /* * Do re-ordering by putting one out of N packets at the front @@ -382,9 +457,9 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) cb->time_to_send = psched_get_time(); q->counter = 0; - __skb_queue_head(&q->qdisc->q, skb); - q->qdisc->qstats.backlog += qdisc_pkt_len(skb); - q->qdisc->qstats.requeues++; + __skb_queue_head(&sch->q, skb); + sch->qstats.backlog += qdisc_pkt_len(skb); + sch->qstats.requeues++; ret = NET_XMIT_SUCCESS; } @@ -395,19 +470,20 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) } } - sch->q.qlen++; return NET_XMIT_SUCCESS; } static unsigned int netem_drop(struct Qdisc *sch) { struct netem_sched_data *q = qdisc_priv(sch); - unsigned int len = 0; + unsigned int len; - if (q->qdisc->ops->drop && (len = q->qdisc->ops->drop(q->qdisc)) != 0) { - sch->q.qlen--; + len = qdisc_queue_drop(sch); + if (!len && q->qdisc && q->qdisc->ops->drop) + len = q->qdisc->ops->drop(q->qdisc); + if (len) sch->qstats.drops++; - } + return len; } @@ -419,16 +495,16 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch) if (qdisc_is_throttled(sch)) return NULL; - skb = q->qdisc->ops->peek(q->qdisc); +tfifo_dequeue: + skb = qdisc_peek_head(sch); if (skb) { const struct netem_skb_cb *cb = netem_skb_cb(skb); - psched_time_t now = psched_get_time(); /* if more time remaining? */ - if (cb->time_to_send <= now) { - skb = qdisc_dequeue_peeked(q->qdisc); + if (cb->time_to_send <= psched_get_time()) { + skb = qdisc_dequeue_tail(sch); if (unlikely(!skb)) - return NULL; + goto qdisc_dequeue; #ifdef CONFIG_NET_CLS_ACT /* @@ -439,15 +515,37 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch) skb->tstamp.tv64 = 0; #endif - sch->q.qlen--; + if (q->qdisc) { + int err = qdisc_enqueue(skb, q->qdisc); + + if (unlikely(err != NET_XMIT_SUCCESS)) { + if (net_xmit_drop_count(err)) { + sch->qstats.drops++; + qdisc_tree_decrease_qlen(sch, 1); + } + } + goto tfifo_dequeue; + } +deliver: qdisc_unthrottled(sch); qdisc_bstats_update(sch, skb); return skb; } + if (q->qdisc) { + skb = q->qdisc->ops->dequeue(q->qdisc); + if (skb) + goto deliver; + } qdisc_watchdog_schedule(&q->watchdog, cb->time_to_send); } +qdisc_dequeue: + if (q->qdisc) { + skb = q->qdisc->ops->dequeue(q->qdisc); + if (skb) + goto deliver; + } return NULL; } @@ -455,8 +553,9 @@ static void netem_reset(struct Qdisc *sch) { struct netem_sched_data *q = qdisc_priv(sch); - qdisc_reset(q->qdisc); - sch->q.qlen = 0; + qdisc_reset_queue(sch); + if (q->qdisc) + qdisc_reset(q->qdisc); qdisc_watchdog_cancel(&q->watchdog); } @@ -488,7 +587,7 @@ static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr) return -EINVAL; s = sizeof(struct disttable) + n * sizeof(s16); - d = kmalloc(s, GFP_KERNEL); + d = kmalloc(s, GFP_KERNEL | __GFP_NOWARN); if (!d) d = vmalloc(s); if (!d) @@ -501,9 +600,10 @@ static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr) root_lock = qdisc_root_sleeping_lock(sch); spin_lock_bh(root_lock); - dist_free(q->delay_dist); - q->delay_dist = d; + swap(q->delay_dist, d); spin_unlock_bh(root_lock); + + dist_free(d); return 0; } @@ -535,6 +635,19 @@ static void get_corrupt(struct Qdisc *sch, const struct nlattr *attr) init_crandom(&q->corrupt_cor, r->correlation); } +static void get_rate(struct Qdisc *sch, const struct nlattr *attr) +{ + struct netem_sched_data *q = qdisc_priv(sch); + const struct tc_netem_rate *r = nla_data(attr); + + q->rate = r->rate; + q->packet_overhead = r->packet_overhead; + q->cell_size = r->cell_size; + if (q->cell_size) + q->cell_size_reciprocal = reciprocal_value(q->cell_size); + q->cell_overhead = r->cell_overhead; +} + static int get_loss_clg(struct Qdisc *sch, const struct nlattr *attr) { struct netem_sched_data *q = qdisc_priv(sch); @@ -548,7 +661,7 @@ static int get_loss_clg(struct Qdisc *sch, const struct nlattr *attr) case NETEM_LOSS_GI: { const struct tc_netem_gimodel *gi = nla_data(la); - if (nla_len(la) != sizeof(struct tc_netem_gimodel)) { + if (nla_len(la) < sizeof(struct tc_netem_gimodel)) { pr_info("netem: incorrect gi model size\n"); return -EINVAL; } @@ -567,8 +680,8 @@ static int get_loss_clg(struct Qdisc *sch, const struct nlattr *attr) case NETEM_LOSS_GE: { const struct tc_netem_gemodel *ge = nla_data(la); - if (nla_len(la) != sizeof(struct tc_netem_gemodel)) { - pr_info("netem: incorrect gi model size\n"); + if (nla_len(la) < sizeof(struct tc_netem_gemodel)) { + pr_info("netem: incorrect ge model size\n"); return -EINVAL; } @@ -594,6 +707,7 @@ static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = { [TCA_NETEM_CORR] = { .len = sizeof(struct tc_netem_corr) }, [TCA_NETEM_REORDER] = { .len = sizeof(struct tc_netem_reorder) }, [TCA_NETEM_CORRUPT] = { .len = sizeof(struct tc_netem_corrupt) }, + [TCA_NETEM_RATE] = { .len = sizeof(struct tc_netem_rate) }, [TCA_NETEM_LOSS] = { .type = NLA_NESTED }, }; @@ -631,11 +745,7 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt) if (ret < 0) return ret; - ret = fifo_set_limit(q->qdisc, qopt->limit); - if (ret) { - pr_info("netem: can't set fifo limit\n"); - return ret; - } + sch->limit = qopt->limit; q->latency = qopt->latency; q->jitter = qopt->jitter; @@ -666,6 +776,9 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt) if (tb[TCA_NETEM_CORRUPT]) get_corrupt(sch, tb[TCA_NETEM_CORRUPT]); + if (tb[TCA_NETEM_RATE]) + get_rate(sch, tb[TCA_NETEM_RATE]); + q->loss_model = CLG_RANDOM; if (tb[TCA_NETEM_LOSS]) ret = get_loss_clg(sch, tb[TCA_NETEM_LOSS]); @@ -673,88 +786,6 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt) return ret; } -/* - * Special case version of FIFO queue for use by netem. - * It queues in order based on timestamps in skb's - */ -struct fifo_sched_data { - u32 limit; - psched_time_t oldest; -}; - -static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch) -{ - struct fifo_sched_data *q = qdisc_priv(sch); - struct sk_buff_head *list = &sch->q; - psched_time_t tnext = netem_skb_cb(nskb)->time_to_send; - struct sk_buff *skb; - - if (likely(skb_queue_len(list) < q->limit)) { - /* Optimize for add at tail */ - if (likely(skb_queue_empty(list) || tnext >= q->oldest)) { - q->oldest = tnext; - return qdisc_enqueue_tail(nskb, sch); - } - - skb_queue_reverse_walk(list, skb) { - const struct netem_skb_cb *cb = netem_skb_cb(skb); - - if (tnext >= cb->time_to_send) - break; - } - - __skb_queue_after(list, skb, nskb); - - sch->qstats.backlog += qdisc_pkt_len(nskb); - - return NET_XMIT_SUCCESS; - } - - return qdisc_reshape_fail(nskb, sch); -} - -static int tfifo_init(struct Qdisc *sch, struct nlattr *opt) -{ - struct fifo_sched_data *q = qdisc_priv(sch); - - if (opt) { - struct tc_fifo_qopt *ctl = nla_data(opt); - if (nla_len(opt) < sizeof(*ctl)) - return -EINVAL; - - q->limit = ctl->limit; - } else - q->limit = max_t(u32, qdisc_dev(sch)->tx_queue_len, 1); - - q->oldest = PSCHED_PASTPERFECT; - return 0; -} - -static int tfifo_dump(struct Qdisc *sch, struct sk_buff *skb) -{ - struct fifo_sched_data *q = qdisc_priv(sch); - struct tc_fifo_qopt opt = { .limit = q->limit }; - - NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); - return skb->len; - -nla_put_failure: - return -1; -} - -static struct Qdisc_ops tfifo_qdisc_ops __read_mostly = { - .id = "tfifo", - .priv_size = sizeof(struct fifo_sched_data), - .enqueue = tfifo_enqueue, - .dequeue = qdisc_dequeue_head, - .peek = qdisc_peek_head, - .drop = qdisc_queue_drop, - .init = tfifo_init, - .reset = qdisc_reset_queue, - .change = tfifo_init, - .dump = tfifo_dump, -}; - static int netem_init(struct Qdisc *sch, struct nlattr *opt) { struct netem_sched_data *q = qdisc_priv(sch); @@ -766,18 +797,9 @@ static int netem_init(struct Qdisc *sch, struct nlattr *opt) qdisc_watchdog_init(&q->watchdog, sch); q->loss_model = CLG_RANDOM; - q->qdisc = qdisc_create_dflt(sch->dev_queue, &tfifo_qdisc_ops, - TC_H_MAKE(sch->handle, 1)); - if (!q->qdisc) { - pr_notice("netem: qdisc create tfifo qdisc failed\n"); - return -ENOMEM; - } - ret = netem_change(sch, opt); - if (ret) { + if (ret) pr_info("netem: change failed\n"); - qdisc_destroy(q->qdisc); - } return ret; } @@ -786,7 +808,8 @@ static void netem_destroy(struct Qdisc *sch) struct netem_sched_data *q = qdisc_priv(sch); qdisc_watchdog_cancel(&q->watchdog); - qdisc_destroy(q->qdisc); + if (q->qdisc) + qdisc_destroy(q->qdisc); dist_free(q->delay_dist); } @@ -846,6 +869,7 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb) struct tc_netem_corr cor; struct tc_netem_reorder reorder; struct tc_netem_corrupt corrupt; + struct tc_netem_rate rate; qopt.latency = q->latency; qopt.jitter = q->jitter; @@ -868,6 +892,12 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb) corrupt.correlation = q->corrupt_cor.rho; NLA_PUT(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt); + rate.rate = q->rate; + rate.packet_overhead = q->packet_overhead; + rate.cell_size = q->cell_size; + rate.cell_overhead = q->cell_overhead; + NLA_PUT(skb, TCA_NETEM_RATE, sizeof(rate), &rate); + if (dump_loss_model(q, skb) != 0) goto nla_put_failure; @@ -883,7 +913,7 @@ static int netem_dump_class(struct Qdisc *sch, unsigned long cl, { struct netem_sched_data *q = qdisc_priv(sch); - if (cl != 1) /* only one class */ + if (cl != 1 || !q->qdisc) /* only one class */ return -ENOENT; tcm->tcm_handle |= TC_H_MIN(1); @@ -897,14 +927,13 @@ static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, { struct netem_sched_data *q = qdisc_priv(sch); - if (new == NULL) - new = &noop_qdisc; - sch_tree_lock(sch); *old = q->qdisc; q->qdisc = new; - qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); - qdisc_reset(*old); + if (*old) { + qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); + qdisc_reset(*old); + } sch_tree_unlock(sch); return 0; diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index 103343408593..e68cb440756a 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -211,6 +211,7 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr *tb[TCA_QFQ_MAX + 1]; u32 weight, lmax, inv_w; int i, err; + int delta_w; if (tca[TCA_OPTIONS] == NULL) { pr_notice("qfq: no options\n"); @@ -232,9 +233,10 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, inv_w = ONE_FP / weight; weight = ONE_FP / inv_w; - if (q->wsum + weight > QFQ_MAX_WSUM) { + delta_w = weight - (cl ? ONE_FP / cl->inv_w : 0); + if (q->wsum + delta_w > QFQ_MAX_WSUM) { pr_notice("qfq: total weight out of range (%u + %u)\n", - weight, q->wsum); + delta_w, q->wsum); return -EINVAL; } @@ -256,13 +258,12 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, return err; } - sch_tree_lock(sch); - if (tb[TCA_QFQ_WEIGHT]) { - q->wsum = weight - ONE_FP / cl->inv_w; + if (inv_w != cl->inv_w) { + sch_tree_lock(sch); + q->wsum += delta_w; cl->inv_w = inv_w; + sch_tree_unlock(sch); } - sch_tree_unlock(sch); - return 0; } @@ -277,7 +278,6 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, i = qfq_calc_index(cl->inv_w, cl->lmax); cl->grp = &q->groups[i]; - q->wsum += weight; cl->qdisc = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid); @@ -294,6 +294,7 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, return err; } } + q->wsum += weight; sch_tree_lock(sch); qdisc_class_hash_insert(&q->clhash, &cl->common); @@ -817,11 +818,11 @@ skip_unblock: static void qfq_update_start(struct qfq_sched *q, struct qfq_class *cl) { unsigned long mask; - uint32_t limit, roundedF; + u64 limit, roundedF; int slot_shift = cl->grp->slot_shift; roundedF = qfq_round_down(cl->F, slot_shift); - limit = qfq_round_down(q->V, slot_shift) + (1UL << slot_shift); + limit = qfq_round_down(q->V, slot_shift) + (1ULL << slot_shift); if (!qfq_gt(cl->F, q->V) || qfq_gt(roundedF, limit)) { /* timestamp was stale */ diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index 6649463da1b6..a5cc3012cf42 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -39,7 +39,9 @@ struct red_sched_data { u32 limit; /* HARD maximal queue length */ unsigned char flags; + struct timer_list adapt_timer; struct red_parms parms; + struct red_vars vars; struct red_stats stats; struct Qdisc *qdisc; }; @@ -60,12 +62,14 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch) struct Qdisc *child = q->qdisc; int ret; - q->parms.qavg = red_calc_qavg(&q->parms, child->qstats.backlog); + q->vars.qavg = red_calc_qavg(&q->parms, + &q->vars, + child->qstats.backlog); - if (red_is_idling(&q->parms)) - red_end_of_idle_period(&q->parms); + if (red_is_idling(&q->vars)) + red_end_of_idle_period(&q->vars); - switch (red_action(&q->parms, q->parms.qavg)) { + switch (red_action(&q->parms, &q->vars, q->vars.qavg)) { case RED_DONT_MARK: break; @@ -116,8 +120,8 @@ static struct sk_buff *red_dequeue(struct Qdisc *sch) qdisc_bstats_update(sch, skb); sch->q.qlen--; } else { - if (!red_is_idling(&q->parms)) - red_start_of_idle_period(&q->parms); + if (!red_is_idling(&q->vars)) + red_start_of_idle_period(&q->vars); } return skb; } @@ -143,8 +147,8 @@ static unsigned int red_drop(struct Qdisc *sch) return len; } - if (!red_is_idling(&q->parms)) - red_start_of_idle_period(&q->parms); + if (!red_is_idling(&q->vars)) + red_start_of_idle_period(&q->vars); return 0; } @@ -155,18 +159,21 @@ static void red_reset(struct Qdisc *sch) qdisc_reset(q->qdisc); sch->q.qlen = 0; - red_restart(&q->parms); + red_restart(&q->vars); } static void red_destroy(struct Qdisc *sch) { struct red_sched_data *q = qdisc_priv(sch); + + del_timer_sync(&q->adapt_timer); qdisc_destroy(q->qdisc); } static const struct nla_policy red_policy[TCA_RED_MAX + 1] = { [TCA_RED_PARMS] = { .len = sizeof(struct tc_red_qopt) }, [TCA_RED_STAB] = { .len = RED_STAB_SIZE }, + [TCA_RED_MAX_P] = { .type = NLA_U32 }, }; static int red_change(struct Qdisc *sch, struct nlattr *opt) @@ -176,6 +183,7 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt) struct tc_red_qopt *ctl; struct Qdisc *child = NULL; int err; + u32 max_P; if (opt == NULL) return -EINVAL; @@ -188,6 +196,8 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt) tb[TCA_RED_STAB] == NULL) return -EINVAL; + max_P = tb[TCA_RED_MAX_P] ? nla_get_u32(tb[TCA_RED_MAX_P]) : 0; + ctl = nla_data(tb[TCA_RED_PARMS]); if (ctl->limit > 0) { @@ -205,22 +215,42 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt) q->qdisc = child; } - red_set_parms(&q->parms, ctl->qth_min, ctl->qth_max, ctl->Wlog, - ctl->Plog, ctl->Scell_log, - nla_data(tb[TCA_RED_STAB])); + red_set_parms(&q->parms, + ctl->qth_min, ctl->qth_max, ctl->Wlog, + ctl->Plog, ctl->Scell_log, + nla_data(tb[TCA_RED_STAB]), + max_P); + red_set_vars(&q->vars); - if (skb_queue_empty(&sch->q)) - red_end_of_idle_period(&q->parms); + del_timer(&q->adapt_timer); + if (ctl->flags & TC_RED_ADAPTATIVE) + mod_timer(&q->adapt_timer, jiffies + HZ/2); + + if (!q->qdisc->q.qlen) + red_start_of_idle_period(&q->vars); sch_tree_unlock(sch); return 0; } +static inline void red_adaptative_timer(unsigned long arg) +{ + struct Qdisc *sch = (struct Qdisc *)arg; + struct red_sched_data *q = qdisc_priv(sch); + spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch)); + + spin_lock(root_lock); + red_adaptative_algo(&q->parms, &q->vars); + mod_timer(&q->adapt_timer, jiffies + HZ/2); + spin_unlock(root_lock); +} + static int red_init(struct Qdisc *sch, struct nlattr *opt) { struct red_sched_data *q = qdisc_priv(sch); q->qdisc = &noop_qdisc; + setup_timer(&q->adapt_timer, red_adaptative_timer, (unsigned long)sch); return red_change(sch, opt); } @@ -243,6 +273,7 @@ static int red_dump(struct Qdisc *sch, struct sk_buff *skb) if (opts == NULL) goto nla_put_failure; NLA_PUT(skb, TCA_RED_PARMS, sizeof(opt), &opt); + NLA_PUT_U32(skb, TCA_RED_MAX_P, q->parms.max_P); return nla_nest_end(skb, opts); nla_put_failure: diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c index 0a833d0c1f61..96e42cae4c7a 100644 --- a/net/sched/sch_sfb.c +++ b/net/sched/sch_sfb.c @@ -26,6 +26,7 @@ #include <net/ip.h> #include <net/pkt_sched.h> #include <net/inet_ecn.h> +#include <net/flow_keys.h> /* * SFB uses two B[l][n] : L x N arrays of bins (L levels, N bins per level) @@ -286,6 +287,13 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch) u32 minqlen = ~0; u32 r, slot, salt, sfbhash; int ret = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; + struct flow_keys keys; + + if (unlikely(sch->q.qlen >= q->limit)) { + sch->qstats.overlimits++; + q->stats.queuedrop++; + goto drop; + } if (q->rehash_interval > 0) { unsigned long limit = q->rehash_time + q->rehash_interval; @@ -303,13 +311,19 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch) /* If using external classifiers, get result and record it. */ if (!sfb_classify(skb, q, &ret, &salt)) goto other_drop; + keys.src = salt; + keys.dst = 0; + keys.ports = 0; } else { - salt = skb_get_rxhash(skb); + skb_flow_dissect(skb, &keys); } slot = q->slot; - sfbhash = jhash_1word(salt, q->bins[slot].perturbation); + sfbhash = jhash_3words((__force u32)keys.dst, + (__force u32)keys.src, + (__force u32)keys.ports, + q->bins[slot].perturbation); if (!sfbhash) sfbhash = 1; sfb_skb_cb(skb)->hashes[slot] = sfbhash; @@ -332,19 +346,19 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch) slot ^= 1; sfb_skb_cb(skb)->hashes[slot] = 0; - if (unlikely(minqlen >= q->max || sch->q.qlen >= q->limit)) { + if (unlikely(minqlen >= q->max)) { sch->qstats.overlimits++; - if (minqlen >= q->max) - q->stats.bucketdrop++; - else - q->stats.queuedrop++; + q->stats.bucketdrop++; goto drop; } if (unlikely(p_min >= SFB_MAX_PROB)) { /* Inelastic flow */ if (q->double_buffering) { - sfbhash = jhash_1word(salt, q->bins[slot].perturbation); + sfbhash = jhash_3words((__force u32)keys.dst, + (__force u32)keys.src, + (__force u32)keys.ports, + q->bins[slot].perturbation); if (!sfbhash) sfbhash = 1; sfb_skb_cb(skb)->hashes[slot] = sfbhash; diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 4f5510e2bd6f..67494aef9acf 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -17,14 +17,14 @@ #include <linux/in.h> #include <linux/errno.h> #include <linux/init.h> -#include <linux/ipv6.h> #include <linux/skbuff.h> #include <linux/jhash.h> #include <linux/slab.h> #include <linux/vmalloc.h> -#include <net/ip.h> #include <net/netlink.h> #include <net/pkt_sched.h> +#include <net/flow_keys.h> +#include <net/red.h> /* Stochastic Fairness Queuing algorithm. @@ -67,16 +67,18 @@ SFQ is superior for this purpose. IMPLEMENTATION: - This implementation limits maximal queue length to 128; - max mtu to 2^18-1; max 128 flows, number of hash buckets to 1024. - The only goal of this restrictions was that all data - fit into one 4K page on 32bit arches. + This implementation limits : + - maximal queue length per flow to 127 packets. + - max mtu to 2^18-1; + - max 65408 flows, + - number of hash buckets to 65536. It is easy to increase these values, but not in flight. */ -#define SFQ_DEPTH 128 /* max number of packets per flow */ -#define SFQ_SLOTS 128 /* max number of flows */ -#define SFQ_EMPTY_SLOT 255 +#define SFQ_MAX_DEPTH 127 /* max number of packets per flow */ +#define SFQ_DEFAULT_FLOWS 128 +#define SFQ_MAX_FLOWS (0x10000 - SFQ_MAX_DEPTH - 1) /* max number of flows */ +#define SFQ_EMPTY_SLOT 0xffff #define SFQ_DEFAULT_HASH_DIVISOR 1024 /* We use 16 bits to store allot, and want to handle packets up to 64K @@ -85,13 +87,13 @@ #define SFQ_ALLOT_SHIFT 3 #define SFQ_ALLOT_SIZE(X) DIV_ROUND_UP(X, 1 << SFQ_ALLOT_SHIFT) -/* This type should contain at least SFQ_DEPTH + SFQ_SLOTS values */ -typedef unsigned char sfq_index; +/* This type should contain at least SFQ_MAX_DEPTH + 1 + SFQ_MAX_FLOWS values */ +typedef u16 sfq_index; /* * We dont use pointers to save space. - * Small indexes [0 ... SFQ_SLOTS - 1] are 'pointers' to slots[] array - * while following values [SFQ_SLOTS ... SFQ_SLOTS + SFQ_DEPTH - 1] + * Small indexes [0 ... SFQ_MAX_FLOWS - 1] are 'pointers' to slots[] array + * while following values [SFQ_MAX_FLOWS ... SFQ_MAX_FLOWS + SFQ_MAX_DEPTH] * are 'pointers' to dep[] array */ struct sfq_head { @@ -103,28 +105,45 @@ struct sfq_slot { struct sk_buff *skblist_next; struct sk_buff *skblist_prev; sfq_index qlen; /* number of skbs in skblist */ - sfq_index next; /* next slot in sfq chain */ + sfq_index next; /* next slot in sfq RR chain */ struct sfq_head dep; /* anchor in dep[] chains */ unsigned short hash; /* hash value (index in ht[]) */ short allot; /* credit for this slot */ + + unsigned int backlog; + struct red_vars vars; }; struct sfq_sched_data { -/* Parameters */ - int perturb_period; - unsigned int quantum; /* Allotment per round: MUST BE >= MTU */ - int limit; +/* frequently used fields */ + int limit; /* limit of total number of packets in this qdisc */ unsigned int divisor; /* number of slots in hash table */ -/* Variables */ - struct tcf_proto *filter_list; - struct timer_list perturb_timer; + u8 headdrop; + u8 maxdepth; /* limit of packets per flow */ + u32 perturbation; - sfq_index cur_depth; /* depth of longest slot */ + u8 cur_depth; /* depth of longest slot */ + u8 flags; unsigned short scaled_quantum; /* SFQ_ALLOT_SIZE(quantum) */ + struct tcf_proto *filter_list; + sfq_index *ht; /* Hash table ('divisor' slots) */ + struct sfq_slot *slots; /* Flows table ('maxflows' entries) */ + + struct red_parms *red_parms; + struct tc_sfqred_stats stats; struct sfq_slot *tail; /* current slot in round */ - sfq_index *ht; /* Hash table (divisor slots) */ - struct sfq_slot slots[SFQ_SLOTS]; - struct sfq_head dep[SFQ_DEPTH]; /* Linked list of slots, indexed by depth */ + + struct sfq_head dep[SFQ_MAX_DEPTH + 1]; + /* Linked lists of slots, indexed by depth + * dep[0] : list of unused flows + * dep[1] : list of flows with 1 packet + * dep[X] : list of flows with X packets + */ + + unsigned int maxflows; /* number of flows in flows array */ + int perturb_period; + unsigned int quantum; /* Allotment per round: MUST BE >= MTU */ + struct timer_list perturb_timer; }; /* @@ -132,66 +151,36 @@ struct sfq_sched_data { */ static inline struct sfq_head *sfq_dep_head(struct sfq_sched_data *q, sfq_index val) { - if (val < SFQ_SLOTS) + if (val < SFQ_MAX_FLOWS) return &q->slots[val].dep; - return &q->dep[val - SFQ_SLOTS]; + return &q->dep[val - SFQ_MAX_FLOWS]; } -static unsigned int sfq_fold_hash(struct sfq_sched_data *q, u32 h, u32 h1) +/* + * In order to be able to quickly rehash our queue when timer changes + * q->perturbation, we store flow_keys in skb->cb[] + */ +struct sfq_skb_cb { + struct flow_keys keys; +}; + +static inline struct sfq_skb_cb *sfq_skb_cb(const struct sk_buff *skb) { - return jhash_2words(h, h1, q->perturbation) & (q->divisor - 1); + BUILD_BUG_ON(sizeof(skb->cb) < + sizeof(struct qdisc_skb_cb) + sizeof(struct sfq_skb_cb)); + return (struct sfq_skb_cb *)qdisc_skb_cb(skb)->data; } -static unsigned int sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb) +static unsigned int sfq_hash(const struct sfq_sched_data *q, + const struct sk_buff *skb) { - u32 h, h2; - - switch (skb->protocol) { - case htons(ETH_P_IP): - { - const struct iphdr *iph; - int poff; - - if (!pskb_network_may_pull(skb, sizeof(*iph))) - goto err; - iph = ip_hdr(skb); - h = (__force u32)iph->daddr; - h2 = (__force u32)iph->saddr ^ iph->protocol; - if (ip_is_fragment(iph)) - break; - poff = proto_ports_offset(iph->protocol); - if (poff >= 0 && - pskb_network_may_pull(skb, iph->ihl * 4 + 4 + poff)) { - iph = ip_hdr(skb); - h2 ^= *(u32 *)((void *)iph + iph->ihl * 4 + poff); - } - break; - } - case htons(ETH_P_IPV6): - { - const struct ipv6hdr *iph; - int poff; - - if (!pskb_network_may_pull(skb, sizeof(*iph))) - goto err; - iph = ipv6_hdr(skb); - h = (__force u32)iph->daddr.s6_addr32[3]; - h2 = (__force u32)iph->saddr.s6_addr32[3] ^ iph->nexthdr; - poff = proto_ports_offset(iph->nexthdr); - if (poff >= 0 && - pskb_network_may_pull(skb, sizeof(*iph) + 4 + poff)) { - iph = ipv6_hdr(skb); - h2 ^= *(u32 *)((void *)iph + sizeof(*iph) + poff); - } - break; - } - default: -err: - h = (unsigned long)skb_dst(skb) ^ (__force u32)skb->protocol; - h2 = (unsigned long)skb->sk; - } + const struct flow_keys *keys = &sfq_skb_cb(skb)->keys; + unsigned int hash; - return sfq_fold_hash(q, h, h2); + hash = jhash_3words((__force u32)keys->dst, + (__force u32)keys->src ^ keys->ip_proto, + (__force u32)keys->ports, q->perturbation); + return hash & (q->divisor - 1); } static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch, @@ -206,8 +195,10 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch, TC_H_MIN(skb->priority) <= q->divisor) return TC_H_MIN(skb->priority); - if (!q->filter_list) + if (!q->filter_list) { + skb_flow_dissect(skb, &sfq_skb_cb(skb)->keys); return sfq_hash(q, skb) + 1; + } *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; result = tc_classify(skb, q->filter_list, &res); @@ -228,18 +219,19 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch, } /* - * x : slot number [0 .. SFQ_SLOTS - 1] + * x : slot number [0 .. SFQ_MAX_FLOWS - 1] */ static inline void sfq_link(struct sfq_sched_data *q, sfq_index x) { sfq_index p, n; - int qlen = q->slots[x].qlen; + struct sfq_slot *slot = &q->slots[x]; + int qlen = slot->qlen; - p = qlen + SFQ_SLOTS; + p = qlen + SFQ_MAX_FLOWS; n = q->dep[qlen].next; - q->slots[x].dep.next = n; - q->slots[x].dep.prev = p; + slot->dep.next = n; + slot->dep.prev = p; q->dep[qlen].next = x; /* sfq_dep_head(q, p)->next = x */ sfq_dep_head(q, n)->prev = x; @@ -304,6 +296,7 @@ static inline struct sk_buff *slot_dequeue_head(struct sfq_slot *slot) static inline void slot_queue_init(struct sfq_slot *slot) { + memset(slot, 0, sizeof(*slot)); slot->skblist_prev = slot->skblist_next = (struct sk_buff *)slot; } @@ -334,8 +327,9 @@ static unsigned int sfq_drop(struct Qdisc *sch) x = q->dep[d].next; slot = &q->slots[x]; drop: - skb = slot_dequeue_tail(slot); + skb = q->headdrop ? slot_dequeue_head(slot) : slot_dequeue_tail(slot); len = qdisc_pkt_len(skb); + slot->backlog -= len; sfq_dec(q, x); kfree_skb(skb); sch->q.qlen--; @@ -356,6 +350,23 @@ drop: return 0; } +/* Is ECN parameter configured */ +static int sfq_prob_mark(const struct sfq_sched_data *q) +{ + return q->flags & TC_RED_ECN; +} + +/* Should packets over max threshold just be marked */ +static int sfq_hard_mark(const struct sfq_sched_data *q) +{ + return (q->flags & (TC_RED_ECN | TC_RED_HARDDROP)) == TC_RED_ECN; +} + +static int sfq_headdrop(const struct sfq_sched_data *q) +{ + return q->headdrop; +} + static int sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) { @@ -364,6 +375,8 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) sfq_index x, qlen; struct sfq_slot *slot; int uninitialized_var(ret); + struct sk_buff *head; + int delta; hash = sfq_classify(skb, sch, &ret); if (hash == 0) { @@ -378,28 +391,91 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) slot = &q->slots[x]; if (x == SFQ_EMPTY_SLOT) { x = q->dep[0].next; /* get a free slot */ + if (x >= SFQ_MAX_FLOWS) + return qdisc_drop(skb, sch); q->ht[hash] = x; slot = &q->slots[x]; slot->hash = hash; + slot->backlog = 0; /* should already be 0 anyway... */ + red_set_vars(&slot->vars); + goto enqueue; } + if (q->red_parms) { + slot->vars.qavg = red_calc_qavg_no_idle_time(q->red_parms, + &slot->vars, + slot->backlog); + switch (red_action(q->red_parms, + &slot->vars, + slot->vars.qavg)) { + case RED_DONT_MARK: + break; - /* If selected queue has length q->limit, do simple tail drop, - * i.e. drop _this_ packet. - */ - if (slot->qlen >= q->limit) - return qdisc_drop(skb, sch); + case RED_PROB_MARK: + sch->qstats.overlimits++; + if (sfq_prob_mark(q)) { + /* We know we have at least one packet in queue */ + if (sfq_headdrop(q) && + INET_ECN_set_ce(slot->skblist_next)) { + q->stats.prob_mark_head++; + break; + } + if (INET_ECN_set_ce(skb)) { + q->stats.prob_mark++; + break; + } + } + q->stats.prob_drop++; + goto congestion_drop; + + case RED_HARD_MARK: + sch->qstats.overlimits++; + if (sfq_hard_mark(q)) { + /* We know we have at least one packet in queue */ + if (sfq_headdrop(q) && + INET_ECN_set_ce(slot->skblist_next)) { + q->stats.forced_mark_head++; + break; + } + if (INET_ECN_set_ce(skb)) { + q->stats.forced_mark++; + break; + } + } + q->stats.forced_drop++; + goto congestion_drop; + } + } + + if (slot->qlen >= q->maxdepth) { +congestion_drop: + if (!sfq_headdrop(q)) + return qdisc_drop(skb, sch); + + /* We know we have at least one packet in queue */ + head = slot_dequeue_head(slot); + delta = qdisc_pkt_len(head) - qdisc_pkt_len(skb); + sch->qstats.backlog -= delta; + slot->backlog -= delta; + qdisc_drop(head, sch); + + slot_queue_add(slot, skb); + return NET_XMIT_CN; + } +enqueue: sch->qstats.backlog += qdisc_pkt_len(skb); + slot->backlog += qdisc_pkt_len(skb); slot_queue_add(slot, skb); sfq_inc(q, x); if (slot->qlen == 1) { /* The flow is new */ if (q->tail == NULL) { /* It is the first flow */ slot->next = x; + q->tail = slot; } else { slot->next = q->tail->next; q->tail->next = x; } - q->tail = slot; + /* We could use a bigger initial quantum for new flows */ slot->allot = q->scaled_quantum; } if (++sch->q.qlen <= q->limit) @@ -443,7 +519,7 @@ next_slot: qdisc_bstats_update(sch, skb); sch->q.qlen--; sch->qstats.backlog -= qdisc_pkt_len(skb); - + slot->backlog -= qdisc_pkt_len(skb); /* Is the slot empty? */ if (slot->qlen == 0) { q->ht[slot->hash] = SFQ_EMPTY_SLOT; @@ -468,12 +544,90 @@ sfq_reset(struct Qdisc *sch) kfree_skb(skb); } +/* + * When q->perturbation is changed, we rehash all queued skbs + * to avoid OOO (Out Of Order) effects. + * We dont use sfq_dequeue()/sfq_enqueue() because we dont want to change + * counters. + */ +static void sfq_rehash(struct Qdisc *sch) +{ + struct sfq_sched_data *q = qdisc_priv(sch); + struct sk_buff *skb; + int i; + struct sfq_slot *slot; + struct sk_buff_head list; + int dropped = 0; + + __skb_queue_head_init(&list); + + for (i = 0; i < q->maxflows; i++) { + slot = &q->slots[i]; + if (!slot->qlen) + continue; + while (slot->qlen) { + skb = slot_dequeue_head(slot); + sfq_dec(q, i); + __skb_queue_tail(&list, skb); + } + slot->backlog = 0; + red_set_vars(&slot->vars); + q->ht[slot->hash] = SFQ_EMPTY_SLOT; + } + q->tail = NULL; + + while ((skb = __skb_dequeue(&list)) != NULL) { + unsigned int hash = sfq_hash(q, skb); + sfq_index x = q->ht[hash]; + + slot = &q->slots[x]; + if (x == SFQ_EMPTY_SLOT) { + x = q->dep[0].next; /* get a free slot */ + if (x >= SFQ_MAX_FLOWS) { +drop: sch->qstats.backlog -= qdisc_pkt_len(skb); + kfree_skb(skb); + dropped++; + continue; + } + q->ht[hash] = x; + slot = &q->slots[x]; + slot->hash = hash; + } + if (slot->qlen >= q->maxdepth) + goto drop; + slot_queue_add(slot, skb); + if (q->red_parms) + slot->vars.qavg = red_calc_qavg(q->red_parms, + &slot->vars, + slot->backlog); + slot->backlog += qdisc_pkt_len(skb); + sfq_inc(q, x); + if (slot->qlen == 1) { /* The flow is new */ + if (q->tail == NULL) { /* It is the first flow */ + slot->next = x; + } else { + slot->next = q->tail->next; + q->tail->next = x; + } + q->tail = slot; + slot->allot = q->scaled_quantum; + } + } + sch->q.qlen -= dropped; + qdisc_tree_decrease_qlen(sch, dropped); +} + static void sfq_perturbation(unsigned long arg) { struct Qdisc *sch = (struct Qdisc *)arg; struct sfq_sched_data *q = qdisc_priv(sch); + spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch)); + spin_lock(root_lock); q->perturbation = net_random(); + if (!q->filter_list && q->tail) + sfq_rehash(sch); + spin_unlock(root_lock); if (q->perturb_period) mod_timer(&q->perturb_timer, jiffies + q->perturb_period); @@ -483,23 +637,54 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt) { struct sfq_sched_data *q = qdisc_priv(sch); struct tc_sfq_qopt *ctl = nla_data(opt); + struct tc_sfq_qopt_v1 *ctl_v1 = NULL; unsigned int qlen; + struct red_parms *p = NULL; if (opt->nla_len < nla_attr_size(sizeof(*ctl))) return -EINVAL; - + if (opt->nla_len >= nla_attr_size(sizeof(*ctl_v1))) + ctl_v1 = nla_data(opt); if (ctl->divisor && (!is_power_of_2(ctl->divisor) || ctl->divisor > 65536)) return -EINVAL; - + if (ctl_v1 && ctl_v1->qth_min) { + p = kmalloc(sizeof(*p), GFP_KERNEL); + if (!p) + return -ENOMEM; + } sch_tree_lock(sch); - q->quantum = ctl->quantum ? : psched_mtu(qdisc_dev(sch)); - q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum); + if (ctl->quantum) { + q->quantum = ctl->quantum; + q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum); + } q->perturb_period = ctl->perturb_period * HZ; - if (ctl->limit) - q->limit = min_t(u32, ctl->limit, SFQ_DEPTH - 1); - if (ctl->divisor) + if (ctl->flows) + q->maxflows = min_t(u32, ctl->flows, SFQ_MAX_FLOWS); + if (ctl->divisor) { q->divisor = ctl->divisor; + q->maxflows = min_t(u32, q->maxflows, q->divisor); + } + if (ctl_v1) { + if (ctl_v1->depth) + q->maxdepth = min_t(u32, ctl_v1->depth, SFQ_MAX_DEPTH); + if (p) { + swap(q->red_parms, p); + red_set_parms(q->red_parms, + ctl_v1->qth_min, ctl_v1->qth_max, + ctl_v1->Wlog, + ctl_v1->Plog, ctl_v1->Scell_log, + NULL, + ctl_v1->max_P); + } + q->flags = ctl_v1->flags; + q->headdrop = ctl_v1->headdrop; + } + if (ctl->limit) { + q->limit = min_t(u32, ctl->limit, q->maxdepth * q->maxflows); + q->maxflows = min_t(u32, q->maxflows, q->limit); + } + qlen = sch->q.qlen; while (sch->q.qlen > q->limit) sfq_drop(sch); @@ -511,49 +696,82 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt) q->perturbation = net_random(); } sch_tree_unlock(sch); + kfree(p); return 0; } +static void *sfq_alloc(size_t sz) +{ + void *ptr = kmalloc(sz, GFP_KERNEL | __GFP_NOWARN); + + if (!ptr) + ptr = vmalloc(sz); + return ptr; +} + +static void sfq_free(void *addr) +{ + if (addr) { + if (is_vmalloc_addr(addr)) + vfree(addr); + else + kfree(addr); + } +} + +static void sfq_destroy(struct Qdisc *sch) +{ + struct sfq_sched_data *q = qdisc_priv(sch); + + tcf_destroy_chain(&q->filter_list); + q->perturb_period = 0; + del_timer_sync(&q->perturb_timer); + sfq_free(q->ht); + sfq_free(q->slots); + kfree(q->red_parms); +} + static int sfq_init(struct Qdisc *sch, struct nlattr *opt) { struct sfq_sched_data *q = qdisc_priv(sch); - size_t sz; int i; q->perturb_timer.function = sfq_perturbation; q->perturb_timer.data = (unsigned long)sch; init_timer_deferrable(&q->perturb_timer); - for (i = 0; i < SFQ_DEPTH; i++) { - q->dep[i].next = i + SFQ_SLOTS; - q->dep[i].prev = i + SFQ_SLOTS; + for (i = 0; i < SFQ_MAX_DEPTH + 1; i++) { + q->dep[i].next = i + SFQ_MAX_FLOWS; + q->dep[i].prev = i + SFQ_MAX_FLOWS; } - q->limit = SFQ_DEPTH - 1; + q->limit = SFQ_MAX_DEPTH; + q->maxdepth = SFQ_MAX_DEPTH; q->cur_depth = 0; q->tail = NULL; q->divisor = SFQ_DEFAULT_HASH_DIVISOR; - if (opt == NULL) { - q->quantum = psched_mtu(qdisc_dev(sch)); - q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum); - q->perturb_period = 0; - q->perturbation = net_random(); - } else { + q->maxflows = SFQ_DEFAULT_FLOWS; + q->quantum = psched_mtu(qdisc_dev(sch)); + q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum); + q->perturb_period = 0; + q->perturbation = net_random(); + + if (opt) { int err = sfq_change(sch, opt); if (err) return err; } - sz = sizeof(q->ht[0]) * q->divisor; - q->ht = kmalloc(sz, GFP_KERNEL); - if (!q->ht && sz > PAGE_SIZE) - q->ht = vmalloc(sz); - if (!q->ht) + q->ht = sfq_alloc(sizeof(q->ht[0]) * q->divisor); + q->slots = sfq_alloc(sizeof(q->slots[0]) * q->maxflows); + if (!q->ht || !q->slots) { + sfq_destroy(sch); return -ENOMEM; + } for (i = 0; i < q->divisor; i++) q->ht[i] = SFQ_EMPTY_SLOT; - for (i = 0; i < SFQ_SLOTS; i++) { + for (i = 0; i < q->maxflows; i++) { slot_queue_init(&q->slots[i]); sfq_link(q, i); } @@ -564,31 +782,32 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt) return 0; } -static void sfq_destroy(struct Qdisc *sch) -{ - struct sfq_sched_data *q = qdisc_priv(sch); - - tcf_destroy_chain(&q->filter_list); - q->perturb_period = 0; - del_timer_sync(&q->perturb_timer); - if (is_vmalloc_addr(q->ht)) - vfree(q->ht); - else - kfree(q->ht); -} - static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb) { struct sfq_sched_data *q = qdisc_priv(sch); unsigned char *b = skb_tail_pointer(skb); - struct tc_sfq_qopt opt; - - opt.quantum = q->quantum; - opt.perturb_period = q->perturb_period / HZ; - - opt.limit = q->limit; - opt.divisor = q->divisor; - opt.flows = q->limit; + struct tc_sfq_qopt_v1 opt; + struct red_parms *p = q->red_parms; + + memset(&opt, 0, sizeof(opt)); + opt.v0.quantum = q->quantum; + opt.v0.perturb_period = q->perturb_period / HZ; + opt.v0.limit = q->limit; + opt.v0.divisor = q->divisor; + opt.v0.flows = q->maxflows; + opt.depth = q->maxdepth; + opt.headdrop = q->headdrop; + + if (p) { + opt.qth_min = p->qth_min >> p->Wlog; + opt.qth_max = p->qth_max >> p->Wlog; + opt.Wlog = p->Wlog; + opt.Plog = p->Plog; + opt.Scell_log = p->Scell_log; + opt.max_P = p->max_P; + } + memcpy(&opt.stats, &q->stats, sizeof(opt.stats)); + opt.flags = q->flags; NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); @@ -644,15 +863,13 @@ static int sfq_dump_class_stats(struct Qdisc *sch, unsigned long cl, sfq_index idx = q->ht[cl - 1]; struct gnet_stats_queue qs = { 0 }; struct tc_sfq_xstats xstats = { 0 }; - struct sk_buff *skb; if (idx != SFQ_EMPTY_SLOT) { const struct sfq_slot *slot = &q->slots[idx]; xstats.allot = slot->allot << SFQ_ALLOT_SHIFT; qs.qlen = slot->qlen; - slot_queue_walk(slot, skb) - qs.backlog += qdisc_pkt_len(skb); + qs.backlog = slot->backlog; } if (gnet_stats_copy_queue(d, &qs) < 0) return -1; diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index 1dcfb5223a86..b8e156319d7b 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -346,6 +346,7 @@ static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb) struct nlattr *nest; struct tc_tbf_qopt opt; + sch->qstats.backlog = q->qdisc->qstats.backlog; nest = nla_nest_start(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index a3b7120fcc74..45326599fda3 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -225,11 +225,11 @@ static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt) static int -__teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *dev) +__teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, + struct net_device *dev, struct netdev_queue *txq, + struct neighbour *mn) { - struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, 0); - struct teql_sched_data *q = qdisc_priv(dev_queue->qdisc); - struct neighbour *mn = dst_get_neighbour(skb_dst(skb)); + struct teql_sched_data *q = qdisc_priv(txq->qdisc); struct neighbour *n = q->ncache; if (mn->tbl == NULL) @@ -262,17 +262,26 @@ __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device * } static inline int teql_resolve(struct sk_buff *skb, - struct sk_buff *skb_res, struct net_device *dev) + struct sk_buff *skb_res, + struct net_device *dev, + struct netdev_queue *txq) { - struct netdev_queue *txq = netdev_get_tx_queue(dev, 0); + struct dst_entry *dst = skb_dst(skb); + struct neighbour *mn; + int res; + if (txq->qdisc == &noop_qdisc) return -ENODEV; - if (dev->header_ops == NULL || - skb_dst(skb) == NULL || - dst_get_neighbour(skb_dst(skb)) == NULL) + if (!dev->header_ops || !dst) return 0; - return __teql_resolve(skb, skb_res, dev); + + rcu_read_lock(); + mn = dst_get_neighbour_noref(dst); + res = mn ? __teql_resolve(skb, skb_res, dev, txq, mn) : 0; + rcu_read_unlock(); + + return res; } static netdev_tx_t teql_master_xmit(struct sk_buff *skb, struct net_device *dev) @@ -301,18 +310,18 @@ restart: if (slave_txq->qdisc_sleeping != q) continue; - if (__netif_subqueue_stopped(slave, subq) || + if (netif_xmit_stopped(netdev_get_tx_queue(slave, subq)) || !netif_running(slave)) { busy = 1; continue; } - switch (teql_resolve(skb, skb_res, slave)) { + switch (teql_resolve(skb, skb_res, slave, slave_txq)) { case 0: if (__netif_tx_trylock(slave_txq)) { unsigned int length = qdisc_pkt_len(skb); - if (!netif_tx_queue_frozen_or_stopped(slave_txq) && + if (!netif_xmit_frozen_or_stopped(slave_txq) && slave_ops->ndo_start_xmit(skb, slave) == NETDEV_TX_OK) { txq_trans_update(slave_txq); __netif_tx_unlock(slave_txq); @@ -324,7 +333,7 @@ restart: } __netif_tx_unlock(slave_txq); } - if (netif_queue_stopped(dev)) + if (netif_xmit_stopped(netdev_get_tx_queue(dev, 0))) busy = 1; break; case 1: diff --git a/net/sctp/associola.c b/net/sctp/associola.c index dc16b90ddb6f..acd2edbc073e 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -173,7 +173,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a asoc->timeouts[SCTP_EVENT_TIMEOUT_HEARTBEAT] = 0; asoc->timeouts[SCTP_EVENT_TIMEOUT_SACK] = asoc->sackdelay; asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE] = - (unsigned long)sp->autoclose * HZ; + min_t(unsigned long, sp->autoclose, sctp_max_autoclose) * HZ; /* Initializes the timers */ for (i = SCTP_EVENT_TIMEOUT_NONE; i < SCTP_NUM_TIMEOUT_TYPES; ++i) @@ -282,6 +282,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a asoc->peer.asconf_capable = 1; asoc->asconf_addr_del_pending = NULL; asoc->src_out_of_asoc_ok = 0; + asoc->new_transport = NULL; /* Create an input queue. */ sctp_inq_init(&asoc->base.inqueue); diff --git a/net/sctp/auth.c b/net/sctp/auth.c index 865e68fef21c..bf812048cf6f 100644 --- a/net/sctp/auth.c +++ b/net/sctp/auth.c @@ -82,7 +82,7 @@ static struct sctp_auth_bytes *sctp_auth_create_key(__u32 key_len, gfp_t gfp) struct sctp_auth_bytes *key; /* Verify that we are not going to overflow INT_MAX */ - if ((INT_MAX - key_len) < sizeof(struct sctp_auth_bytes)) + if (key_len > (INT_MAX - sizeof(struct sctp_auth_bytes))) return NULL; /* Allocate the shared key */ diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index c8cc24e282c3..68a385d7c3bd 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -415,7 +415,7 @@ static void sctp_endpoint_bh_rcv(struct work_struct *work) sctp_subtype_t subtype; sctp_state_t state; int error = 0; - int first_time = 1; /* is this the first time through the looop */ + int first_time = 1; /* is this the first time through the loop */ if (ep->base.dead) return; diff --git a/net/sctp/input.c b/net/sctp/input.c index b7692aab6e9c..80f71af71384 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -105,7 +105,7 @@ static inline int sctp_rcv_checksum(struct sk_buff *skb) struct sctp_input_cb { union { struct inet_skb_parm h4; -#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) struct inet6_skb_parm h6; #endif } header; diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index aabaee41dd3e..91f479121c55 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -107,7 +107,7 @@ static int sctp_inet6addr_event(struct notifier_block *this, unsigned long ev, if (addr) { addr->a.v6.sin6_family = AF_INET6; addr->a.v6.sin6_port = 0; - ipv6_addr_copy(&addr->a.v6.sin6_addr, &ifa->addr); + addr->a.v6.sin6_addr = ifa->addr; addr->a.v6.sin6_scope_id = ifa->idev->dev->ifindex; addr->valid = 1; spin_lock_bh(&sctp_local_addr_lock); @@ -219,8 +219,8 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport) /* Fill in the dest address from the route entry passed with the skb * and the source address from the transport. */ - ipv6_addr_copy(&fl6.daddr, &transport->ipaddr.v6.sin6_addr); - ipv6_addr_copy(&fl6.saddr, &transport->saddr.v6.sin6_addr); + fl6.daddr = transport->ipaddr.v6.sin6_addr; + fl6.saddr = transport->saddr.v6.sin6_addr; fl6.flowlabel = np->flow_label; IP6_ECN_flow_xmit(sk, fl6.flowlabel); @@ -231,7 +231,7 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport) if (np->opt && np->opt->srcrt) { struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt; - ipv6_addr_copy(&fl6.daddr, rt0->addr); + fl6.daddr = *rt0->addr; } SCTP_DEBUG_PRINTK("%s: skb:%p, len:%d, src:%pI6 dst:%pI6\n", @@ -243,7 +243,7 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport) if (!(transport->param_flags & SPP_PMTUD_ENABLE)) skb->local_df = 1; - return ip6_xmit(sk, skb, &fl6, np->opt); + return ip6_xmit(sk, skb, &fl6, np->opt, np->tclass); } /* Returns the dst cache entry for the given source and destination ip @@ -265,7 +265,7 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr, sctp_scope_t scope; memset(fl6, 0, sizeof(struct flowi6)); - ipv6_addr_copy(&fl6->daddr, &daddr->v6.sin6_addr); + fl6->daddr = daddr->v6.sin6_addr; fl6->fl6_dport = daddr->v6.sin6_port; fl6->flowi6_proto = IPPROTO_SCTP; if (ipv6_addr_type(&daddr->v6.sin6_addr) & IPV6_ADDR_LINKLOCAL) @@ -277,7 +277,7 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr, fl6->fl6_sport = htons(asoc->base.bind_addr.port); if (saddr) { - ipv6_addr_copy(&fl6->saddr, &saddr->v6.sin6_addr); + fl6->saddr = saddr->v6.sin6_addr; fl6->fl6_sport = saddr->v6.sin6_port; SCTP_DEBUG_PRINTK("SRC=%pI6 - ", &fl6->saddr); } @@ -334,7 +334,7 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr, } rcu_read_unlock(); if (baddr) { - ipv6_addr_copy(&fl6->saddr, &baddr->v6.sin6_addr); + fl6->saddr = baddr->v6.sin6_addr; fl6->fl6_sport = baddr->v6.sin6_port; dst = ip6_dst_lookup_flow(sk, fl6, NULL, false); } @@ -375,7 +375,7 @@ static void sctp_v6_get_saddr(struct sctp_sock *sk, if (t->dst) { saddr->v6.sin6_family = AF_INET6; - ipv6_addr_copy(&saddr->v6.sin6_addr, &fl6->saddr); + saddr->v6.sin6_addr = fl6->saddr; } } @@ -400,7 +400,7 @@ static void sctp_v6_copy_addrlist(struct list_head *addrlist, if (addr) { addr->a.v6.sin6_family = AF_INET6; addr->a.v6.sin6_port = 0; - ipv6_addr_copy(&addr->a.v6.sin6_addr, &ifp->addr); + addr->a.v6.sin6_addr = ifp->addr; addr->a.v6.sin6_scope_id = dev->ifindex; addr->valid = 1; INIT_LIST_HEAD(&addr->list); @@ -416,7 +416,6 @@ static void sctp_v6_copy_addrlist(struct list_head *addrlist, static void sctp_v6_from_skb(union sctp_addr *addr,struct sk_buff *skb, int is_saddr) { - void *from; __be16 *port; struct sctphdr *sh; @@ -428,12 +427,11 @@ static void sctp_v6_from_skb(union sctp_addr *addr,struct sk_buff *skb, sh = sctp_hdr(skb); if (is_saddr) { *port = sh->source; - from = &ipv6_hdr(skb)->saddr; + addr->v6.sin6_addr = ipv6_hdr(skb)->saddr; } else { *port = sh->dest; - from = &ipv6_hdr(skb)->daddr; + addr->v6.sin6_addr = ipv6_hdr(skb)->daddr; } - ipv6_addr_copy(&addr->v6.sin6_addr, from); } /* Initialize an sctp_addr from a socket. */ @@ -441,7 +439,7 @@ static void sctp_v6_from_sk(union sctp_addr *addr, struct sock *sk) { addr->v6.sin6_family = AF_INET6; addr->v6.sin6_port = 0; - ipv6_addr_copy(&addr->v6.sin6_addr, &inet6_sk(sk)->rcv_saddr); + addr->v6.sin6_addr = inet6_sk(sk)->rcv_saddr; } /* Initialize sk->sk_rcv_saddr from sctp_addr. */ @@ -454,7 +452,7 @@ static void sctp_v6_to_sk_saddr(union sctp_addr *addr, struct sock *sk) inet6_sk(sk)->rcv_saddr.s6_addr32[3] = addr->v4.sin_addr.s_addr; } else { - ipv6_addr_copy(&inet6_sk(sk)->rcv_saddr, &addr->v6.sin6_addr); + inet6_sk(sk)->rcv_saddr = addr->v6.sin6_addr; } } @@ -467,7 +465,7 @@ static void sctp_v6_to_sk_daddr(union sctp_addr *addr, struct sock *sk) inet6_sk(sk)->daddr.s6_addr32[2] = htonl(0x0000ffff); inet6_sk(sk)->daddr.s6_addr32[3] = addr->v4.sin_addr.s_addr; } else { - ipv6_addr_copy(&inet6_sk(sk)->daddr, &addr->v6.sin6_addr); + inet6_sk(sk)->daddr = addr->v6.sin6_addr; } } @@ -479,7 +477,7 @@ static void sctp_v6_from_addr_param(union sctp_addr *addr, addr->v6.sin6_family = AF_INET6; addr->v6.sin6_port = port; addr->v6.sin6_flowinfo = 0; /* BUG */ - ipv6_addr_copy(&addr->v6.sin6_addr, ¶m->v6.addr); + addr->v6.sin6_addr = param->v6.addr; addr->v6.sin6_scope_id = iif; } @@ -493,7 +491,7 @@ static int sctp_v6_to_addr_param(const union sctp_addr *addr, param->v6.param_hdr.type = SCTP_PARAM_IPV6_ADDRESS; param->v6.param_hdr.length = htons(length); - ipv6_addr_copy(¶m->v6.addr, &addr->v6.sin6_addr); + param->v6.addr = addr->v6.sin6_addr; return length; } @@ -504,7 +502,7 @@ static void sctp_v6_to_addr(union sctp_addr *addr, struct in6_addr *saddr, { addr->sa.sa_family = AF_INET6; addr->v6.sin6_port = port; - ipv6_addr_copy(&addr->v6.sin6_addr, saddr); + addr->v6.sin6_addr = *saddr; } /* Compare addresses exactly. @@ -759,7 +757,7 @@ static void sctp_inet6_event_msgname(struct sctp_ulpevent *event, } sin6from = &asoc->peer.primary_addr.v6; - ipv6_addr_copy(&sin6->sin6_addr, &sin6from->sin6_addr); + sin6->sin6_addr = sin6from->sin6_addr; if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) sin6->sin6_scope_id = sin6from->sin6_scope_id; } @@ -787,7 +785,7 @@ static void sctp_inet6_skb_msgname(struct sk_buff *skb, char *msgname, } /* Otherwise, just copy the v6 address. */ - ipv6_addr_copy(&sin6->sin6_addr, &ipv6_hdr(skb)->saddr); + sin6->sin6_addr = ipv6_hdr(skb)->saddr; if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) { struct sctp_ulpevent *ev = sctp_skb2event(skb); sin6->sin6_scope_id = ev->iif; diff --git a/net/sctp/output.c b/net/sctp/output.c index 08b3cead6503..817174eb5f41 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -697,13 +697,7 @@ static void sctp_packet_append_data(struct sctp_packet *packet, /* Keep track of how many bytes are in flight to the receiver. */ asoc->outqueue.outstanding_bytes += datasize; - /* Update our view of the receiver's rwnd. Include sk_buff overhead - * while updating peer.rwnd so that it reduces the chances of a - * receiver running out of receive buffer space even when receive - * window is still open. This can happen when a sender is sending - * sending small messages. - */ - datasize += sizeof(struct sk_buff); + /* Update our view of the receiver's rwnd. */ if (datasize < rwnd) rwnd -= datasize; else diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index a6d27bf563a5..cfeb1d4a1ee6 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -411,8 +411,7 @@ void sctp_retransmit_mark(struct sctp_outq *q, chunk->transport->flight_size -= sctp_data_size(chunk); q->outstanding_bytes -= sctp_data_size(chunk); - q->asoc->peer.rwnd += (sctp_data_size(chunk) + - sizeof(struct sk_buff)); + q->asoc->peer.rwnd += sctp_data_size(chunk); } continue; } @@ -432,8 +431,7 @@ void sctp_retransmit_mark(struct sctp_outq *q, * (Section 7.2.4)), add the data size of those * chunks to the rwnd. */ - q->asoc->peer.rwnd += (sctp_data_size(chunk) + - sizeof(struct sk_buff)); + q->asoc->peer.rwnd += sctp_data_size(chunk); q->outstanding_bytes -= sctp_data_size(chunk); if (chunk->transport) transport->flight_size -= sctp_data_size(chunk); @@ -917,6 +915,8 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout) * current cwnd). */ if (!list_empty(&q->retransmit)) { + if (asoc->peer.retran_path->state == SCTP_UNCONFIRMED) + goto sctp_flush_out; if (transport == asoc->peer.retran_path) goto retran; @@ -989,6 +989,8 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout) ((new_transport->state == SCTP_INACTIVE) || (new_transport->state == SCTP_UNCONFIRMED))) new_transport = asoc->peer.active_path; + if (new_transport->state == SCTP_UNCONFIRMED) + continue; /* Change packets if necessary. */ if (new_transport != transport) { diff --git a/net/sctp/proc.c b/net/sctp/proc.c index 05a6ce214714..1e2eee88c3ea 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c @@ -37,6 +37,7 @@ #include <linux/types.h> #include <linux/seq_file.h> #include <linux/init.h> +#include <linux/export.h> #include <net/sctp/sctp.h> #include <net/ip.h> /* for snmp_fold_field */ diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 91784f44a2e2..5942d27b1444 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -637,7 +637,7 @@ void sctp_addr_wq_timeout_handler(unsigned long arg) " for cmd %d at entry %p\n", &sctp_addr_waitq, &addrw->a, addrw->state, addrw); -#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) /* Now we send an ASCONF for each association */ /* Note. we currently don't handle link local IPv6 addressees */ if (addrw->a.sa.sa_family == AF_INET6) { @@ -1285,6 +1285,9 @@ SCTP_STATIC __init int sctp_init(void) sctp_max_instreams = SCTP_DEFAULT_INSTREAMS; sctp_max_outstreams = SCTP_DEFAULT_OUTSTREAMS; + /* Initialize maximum autoclose timeout. */ + sctp_max_autoclose = INT_MAX / HZ; + /* Initialize handle used for association ids. */ idr_init(&sctp_assocs_id); @@ -1299,7 +1302,7 @@ SCTP_STATIC __init int sctp_init(void) max_share = min(4UL*1024*1024, limit); sysctl_sctp_rmem[0] = SK_MEM_QUANTUM; /* give each asoc 1 page min */ - sysctl_sctp_rmem[1] = (1500 *(sizeof(struct sk_buff) + 1)); + sysctl_sctp_rmem[1] = 1500 * SKB_TRUESIZE(1); sysctl_sctp_rmem[2] = max(sysctl_sctp_rmem[1], max_share); sysctl_sctp_wmem[0] = SK_MEM_QUANTUM; diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 81db4e385352..a85eeeb55dd0 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -3015,6 +3015,7 @@ static __be16 sctp_process_asconf_param(struct sctp_association *asoc, /* Start the heartbeat timer. */ if (!mod_timer(&peer->hb_timer, sctp_transport_timeout(peer))) sctp_transport_hold(peer); + asoc->new_transport = peer; break; case SCTP_PARAM_DEL_IP: /* ADDIP 4.3 D7) If a request is received to delete the @@ -3399,8 +3400,10 @@ int sctp_process_asconf_ack(struct sctp_association *asoc, asconf_len -= length; } - if (no_err && asoc->src_out_of_asoc_ok) + if (no_err && asoc->src_out_of_asoc_ok) { asoc->src_out_of_asoc_ok = 0; + sctp_transport_immediate_rtx(asoc->peer.primary_path); + } /* Free the cached last sent asconf chunk. */ list_del_init(&asconf->transmitted_list); diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 76388b083f28..1ff51c9d18d5 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -666,6 +666,7 @@ static void sctp_cmd_transport_on(sctp_cmd_seq_t *cmds, struct sctp_chunk *chunk) { sctp_sender_hb_info_t *hbinfo; + int was_unconfirmed = 0; /* 8.3 Upon the receipt of the HEARTBEAT ACK, the sender of the * HEARTBEAT should clear the error counter of the destination @@ -692,9 +693,11 @@ static void sctp_cmd_transport_on(sctp_cmd_seq_t *cmds, /* Mark the destination transport address as active if it is not so * marked. */ - if ((t->state == SCTP_INACTIVE) || (t->state == SCTP_UNCONFIRMED)) + if ((t->state == SCTP_INACTIVE) || (t->state == SCTP_UNCONFIRMED)) { + was_unconfirmed = 1; sctp_assoc_control_transport(asoc, t, SCTP_TRANSPORT_UP, SCTP_HEARTBEAT_SUCCESS); + } /* The receiver of the HEARTBEAT ACK should also perform an * RTT measurement for that destination transport address @@ -712,6 +715,9 @@ static void sctp_cmd_transport_on(sctp_cmd_seq_t *cmds, /* Update the heartbeat timer. */ if (!mod_timer(&t->hb_timer, sctp_transport_timeout(t))) sctp_transport_hold(t); + + if (was_unconfirmed && asoc->peer.transport_count == 1) + sctp_transport_immediate_rtx(t); } diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index a0f31e6c1c63..891f5db8cc31 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -3618,6 +3618,11 @@ sctp_disposition_t sctp_sf_do_asconf(const struct sctp_endpoint *ep, */ asconf_ack->dest = chunk->source; sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(asconf_ack)); + if (asoc->new_transport) { + sctp_sf_heartbeat(ep, asoc, type, asoc->new_transport, + commands); + ((struct sctp_association *)asoc)->new_transport = NULL; + } return SCTP_DISPOSITION_CONSUME; } diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 836aa63ee121..408ebd0e7330 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -78,6 +78,7 @@ #include <net/inet_common.h> #include <linux/socket.h> /* for sa_family_t */ +#include <linux/export.h> #include <net/sock.h> #include <net/sctp/sctp.h> #include <net/sctp/sm.h> @@ -803,7 +804,7 @@ static int sctp_send_asconf_del_ip(struct sock *sk, struct sockaddr_in6 *sin6; sin6 = (struct sockaddr_in6 *)addrs; - ipv6_addr_copy(&asoc->asconf_addr_del_pending->v6.sin6_addr, &sin6->sin6_addr); + asoc->asconf_addr_del_pending->v6.sin6_addr = sin6->sin6_addr; } SCTP_DEBUG_PRINTK_IPADDR("send_asconf_del_ip: keep the last address asoc: %p ", " at %p\n", asoc, asoc->asconf_addr_del_pending, @@ -2199,8 +2200,6 @@ static int sctp_setsockopt_autoclose(struct sock *sk, char __user *optval, return -EINVAL; if (copy_from_user(&sp->autoclose, optval, optlen)) return -EFAULT; - /* make sure it won't exceed MAX_SCHEDULE_TIMEOUT */ - sp->autoclose = min_t(long, sp->autoclose, MAX_SCHEDULE_TIMEOUT / HZ); return 0; } @@ -6840,7 +6839,7 @@ struct proto sctp_prot = { .sockets_allocated = &sctp_sockets_allocated, }; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) struct proto sctpv6_prot = { .name = "SCTPv6", @@ -6871,4 +6870,4 @@ struct proto sctpv6_prot = { .memory_allocated = &sctp_memory_allocated, .sockets_allocated = &sctp_sockets_allocated, }; -#endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */ +#endif /* IS_ENABLED(CONFIG_IPV6) */ diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index 6b3952961b85..60ffbd067ff7 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -53,6 +53,10 @@ static int sack_timer_min = 1; static int sack_timer_max = 500; static int addr_scope_max = 3; /* check sctp_scope_policy_t in include/net/sctp/constants.h for max entries */ static int rwnd_scale_max = 16; +static unsigned long max_autoclose_min = 0; +static unsigned long max_autoclose_max = + (MAX_SCHEDULE_TIMEOUT / HZ > UINT_MAX) + ? UINT_MAX : MAX_SCHEDULE_TIMEOUT / HZ; extern long sysctl_sctp_mem[3]; extern int sysctl_sctp_rmem[3]; @@ -258,6 +262,15 @@ static ctl_table sctp_table[] = { .extra1 = &one, .extra2 = &rwnd_scale_max, }, + { + .procname = "max_autoclose", + .data = &sctp_max_autoclose, + .maxlen = sizeof(unsigned long), + .mode = 0644, + .proc_handler = &proc_doulongvec_minmax, + .extra1 = &max_autoclose_min, + .extra2 = &max_autoclose_max, + }, { /* sentinel */ } }; diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 394c57ca2f54..3889330b7b04 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -641,3 +641,19 @@ void sctp_transport_reset(struct sctp_transport *t) t->cacc.next_tsn_at_change = 0; t->cacc.cacc_saw_newack = 0; } + +/* Schedule retransmission on the given transport */ +void sctp_transport_immediate_rtx(struct sctp_transport *t) +{ + /* Stop pending T3_rtx_timer */ + if (timer_pending(&t->T3_rtx_timer)) { + (void)del_timer(&t->T3_rtx_timer); + sctp_transport_put(t); + } + sctp_retransmit(&t->asoc->outqueue, t, SCTP_RTXR_T3_RTX); + if (!timer_pending(&t->T3_rtx_timer)) { + if (!mod_timer(&t->T3_rtx_timer, jiffies + t->rto)) + sctp_transport_hold(t); + } + return; +} diff --git a/net/socket.c b/net/socket.c index ffe92ca32f2a..28a96af484b4 100644 --- a/net/socket.c +++ b/net/socket.c @@ -538,6 +538,8 @@ int sock_tx_timestamp(struct sock *sk, __u8 *tx_flags) *tx_flags |= SKBTX_HW_TSTAMP; if (sock_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE)) *tx_flags |= SKBTX_SW_TSTAMP; + if (sock_flag(sk, SOCK_WIFI_STATUS)) + *tx_flags |= SKBTX_WIFI_STATUS; return 0; } EXPORT_SYMBOL(sock_tx_timestamp); @@ -549,6 +551,8 @@ static inline int __sock_sendmsg_nosec(struct kiocb *iocb, struct socket *sock, sock_update_classid(sock->sk); + sock_update_netprioidx(sock->sk); + si->sock = sock; si->scm = NULL; si->msg = msg; @@ -674,6 +678,22 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk, } EXPORT_SYMBOL_GPL(__sock_recv_timestamp); +void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk, + struct sk_buff *skb) +{ + int ack; + + if (!sock_flag(sk, SOCK_WIFI_STATUS)) + return; + if (!skb->wifi_acked_valid) + return; + + ack = skb->wifi_acked; + + put_cmsg(msg, SOL_SOCKET, SCM_WIFI_STATUS, sizeof(ack), &ack); +} +EXPORT_SYMBOL_GPL(__sock_recv_wifi_status); + static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) { @@ -2500,7 +2520,7 @@ void sock_unregister(int family) BUG_ON(family < 0 || family >= NPROTO); spin_lock(&net_family_lock); - rcu_assign_pointer(net_families[family], NULL); + RCU_INIT_POINTER(net_families[family], NULL); spin_unlock(&net_family_lock); synchronize_rcu(); @@ -2738,10 +2758,10 @@ static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32) case ETHTOOL_GRXRINGS: case ETHTOOL_GRXCLSRLCNT: case ETHTOOL_GRXCLSRULE: + case ETHTOOL_SRXCLSRLINS: convert_out = true; /* fall through */ case ETHTOOL_SRXCLSRLDEL: - case ETHTOOL_SRXCLSRLINS: buf_size += sizeof(struct ethtool_rxnfc); convert_in = true; break; @@ -2883,7 +2903,7 @@ static int bond_ioctl(struct net *net, unsigned int cmd, return dev_ioctl(net, cmd, uifr); default: - return -EINVAL; + return -ENOIOCTLCMD; } } @@ -3210,20 +3230,6 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock, return sock_do_ioctl(net, sock, cmd, arg); } - /* Prevent warning from compat_sys_ioctl, these always - * result in -EINVAL in the native case anyway. */ - switch (cmd) { - case SIOCRTMSG: - case SIOCGIFCOUNT: - case SIOCSRARP: - case SIOCGRARP: - case SIOCDRARP: - case SIOCSIFLINK: - case SIOCGIFSLAVE: - case SIOCSIFSLAVE: - return -EINVAL; - } - return -ENOIOCTLCMD; } diff --git a/net/sunrpc/addr.c b/net/sunrpc/addr.c index 4195233c4914..ee77742e0ed6 100644 --- a/net/sunrpc/addr.c +++ b/net/sunrpc/addr.c @@ -19,8 +19,9 @@ #include <net/ipv6.h> #include <linux/sunrpc/clnt.h> #include <linux/slab.h> +#include <linux/export.h> -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) static size_t rpc_ntop6_noscopeid(const struct sockaddr *sap, char *buf, const int buflen) @@ -90,7 +91,7 @@ static size_t rpc_ntop6(const struct sockaddr *sap, return len; } -#else /* !(defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)) */ +#else /* !IS_ENABLED(CONFIG_IPV6) */ static size_t rpc_ntop6_noscopeid(const struct sockaddr *sap, char *buf, const int buflen) @@ -104,7 +105,7 @@ static size_t rpc_ntop6(const struct sockaddr *sap, return 0; } -#endif /* !(defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)) */ +#endif /* !IS_ENABLED(CONFIG_IPV6) */ static int rpc_ntop4(const struct sockaddr *sap, char *buf, const size_t buflen) @@ -154,7 +155,7 @@ static size_t rpc_pton4(const char *buf, const size_t buflen, return sizeof(struct sockaddr_in); } -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) static int rpc_parse_scope_id(const char *buf, const size_t buflen, const char *delim, struct sockaddr_in6 *sin6) { @@ -255,12 +256,13 @@ EXPORT_SYMBOL_GPL(rpc_pton); /** * rpc_sockaddr2uaddr - Construct a universal address string from @sap. * @sap: socket address + * @gfp_flags: allocation mode * * Returns a %NUL-terminated string in dynamically allocated memory; * otherwise NULL is returned if an error occurred. Caller must * free the returned string. */ -char *rpc_sockaddr2uaddr(const struct sockaddr *sap) +char *rpc_sockaddr2uaddr(const struct sockaddr *sap, gfp_t gfp_flags) { char portbuf[RPCBIND_MAXUADDRPLEN]; char addrbuf[RPCBIND_MAXUADDRLEN]; @@ -288,9 +290,8 @@ char *rpc_sockaddr2uaddr(const struct sockaddr *sap) if (strlcat(addrbuf, portbuf, sizeof(addrbuf)) > sizeof(addrbuf)) return NULL; - return kstrdup(addrbuf, GFP_KERNEL); + return kstrdup(addrbuf, gfp_flags); } -EXPORT_SYMBOL_GPL(rpc_sockaddr2uaddr); /** * rpc_uaddr2sockaddr - convert a universal address to a socket address. diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c index e010a015d996..1426ec3d0a53 100644 --- a/net/sunrpc/auth_generic.c +++ b/net/sunrpc/auth_generic.c @@ -41,15 +41,17 @@ EXPORT_SYMBOL_GPL(rpc_lookup_cred); /* * Public call interface for looking up machine creds. */ -struct rpc_cred *rpc_lookup_machine_cred(void) +struct rpc_cred *rpc_lookup_machine_cred(const char *service_name) { struct auth_cred acred = { .uid = RPC_MACHINE_CRED_USERID, .gid = RPC_MACHINE_CRED_GROUPID, + .principal = service_name, .machine_cred = 1, }; - dprintk("RPC: looking up machine cred\n"); + dprintk("RPC: looking up machine cred for service %s\n", + service_name); return generic_auth.au_ops->lookup_cred(&generic_auth, &acred, 0); } EXPORT_SYMBOL_GPL(rpc_lookup_machine_cred); diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 364eb45e989d..affa631ac1ab 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -392,7 +392,8 @@ static void gss_encode_v0_msg(struct gss_upcall_msg *gss_msg) } static void gss_encode_v1_msg(struct gss_upcall_msg *gss_msg, - struct rpc_clnt *clnt, int machine_cred) + struct rpc_clnt *clnt, + const char *service_name) { struct gss_api_mech *mech = gss_msg->auth->mech; char *p = gss_msg->databuf; @@ -407,12 +408,8 @@ static void gss_encode_v1_msg(struct gss_upcall_msg *gss_msg, p += len; gss_msg->msg.len += len; } - if (machine_cred) { - len = sprintf(p, "service=* "); - p += len; - gss_msg->msg.len += len; - } else if (!strcmp(clnt->cl_program->name, "nfs4_cb")) { - len = sprintf(p, "service=nfs "); + if (service_name != NULL) { + len = sprintf(p, "service=%s ", service_name); p += len; gss_msg->msg.len += len; } @@ -429,17 +426,18 @@ static void gss_encode_v1_msg(struct gss_upcall_msg *gss_msg, } static void gss_encode_msg(struct gss_upcall_msg *gss_msg, - struct rpc_clnt *clnt, int machine_cred) + struct rpc_clnt *clnt, + const char *service_name) { if (pipe_version == 0) gss_encode_v0_msg(gss_msg); else /* pipe_version == 1 */ - gss_encode_v1_msg(gss_msg, clnt, machine_cred); + gss_encode_v1_msg(gss_msg, clnt, service_name); } -static inline struct gss_upcall_msg * -gss_alloc_msg(struct gss_auth *gss_auth, uid_t uid, struct rpc_clnt *clnt, - int machine_cred) +static struct gss_upcall_msg * +gss_alloc_msg(struct gss_auth *gss_auth, struct rpc_clnt *clnt, + uid_t uid, const char *service_name) { struct gss_upcall_msg *gss_msg; int vers; @@ -459,7 +457,7 @@ gss_alloc_msg(struct gss_auth *gss_auth, uid_t uid, struct rpc_clnt *clnt, atomic_set(&gss_msg->count, 1); gss_msg->uid = uid; gss_msg->auth = gss_auth; - gss_encode_msg(gss_msg, clnt, machine_cred); + gss_encode_msg(gss_msg, clnt, service_name); return gss_msg; } @@ -471,7 +469,7 @@ gss_setup_upcall(struct rpc_clnt *clnt, struct gss_auth *gss_auth, struct rpc_cr struct gss_upcall_msg *gss_new, *gss_msg; uid_t uid = cred->cr_uid; - gss_new = gss_alloc_msg(gss_auth, uid, clnt, gss_cred->gc_machine_cred); + gss_new = gss_alloc_msg(gss_auth, clnt, uid, gss_cred->gc_principal); if (IS_ERR(gss_new)) return gss_new; gss_msg = gss_add_msg(gss_new); @@ -603,26 +601,6 @@ out: return err; } -static ssize_t -gss_pipe_upcall(struct file *filp, struct rpc_pipe_msg *msg, - char __user *dst, size_t buflen) -{ - char *data = (char *)msg->data + msg->copied; - size_t mlen = min(msg->len, buflen); - unsigned long left; - - left = copy_to_user(dst, data, mlen); - if (left == mlen) { - msg->errno = -EFAULT; - return -EFAULT; - } - - mlen -= left; - msg->copied += mlen; - msg->errno = 0; - return mlen; -} - #define MSG_BUF_MAXSIZE 1024 static ssize_t @@ -970,7 +948,7 @@ gss_destroy_nullcred(struct rpc_cred *cred) struct gss_auth *gss_auth = container_of(cred->cr_auth, struct gss_auth, rpc_auth); struct gss_cl_ctx *ctx = gss_cred->gc_ctx; - rcu_assign_pointer(gss_cred->gc_ctx, NULL); + RCU_INIT_POINTER(gss_cred->gc_ctx, NULL); call_rcu(&cred->cr_rcu, gss_free_cred_callback); if (ctx) gss_put_ctx(ctx); @@ -1015,7 +993,9 @@ gss_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) */ cred->gc_base.cr_flags = 1UL << RPCAUTH_CRED_NEW; cred->gc_service = gss_auth->service; - cred->gc_machine_cred = acred->machine_cred; + cred->gc_principal = NULL; + if (acred->machine_cred) + cred->gc_principal = acred->principal; kref_get(&gss_auth->kref); return &cred->gc_base; @@ -1050,7 +1030,12 @@ gss_match(struct auth_cred *acred, struct rpc_cred *rc, int flags) if (!test_bit(RPCAUTH_CRED_UPTODATE, &rc->cr_flags)) return 0; out: - if (acred->machine_cred != gss_cred->gc_machine_cred) + if (acred->principal != NULL) { + if (gss_cred->gc_principal == NULL) + return 0; + return strcmp(acred->principal, gss_cred->gc_principal) == 0; + } + if (gss_cred->gc_principal != NULL) return 0; return rc->cr_uid == acred->uid; } @@ -1124,7 +1109,8 @@ static int gss_renew_cred(struct rpc_task *task) struct rpc_auth *auth = oldcred->cr_auth; struct auth_cred acred = { .uid = oldcred->cr_uid, - .machine_cred = gss_cred->gc_machine_cred, + .principal = gss_cred->gc_principal, + .machine_cred = (gss_cred->gc_principal != NULL ? 1 : 0), }; struct rpc_cred *new; @@ -1590,7 +1576,7 @@ static const struct rpc_credops gss_nullops = { }; static const struct rpc_pipe_ops gss_upcall_ops_v0 = { - .upcall = gss_pipe_upcall, + .upcall = rpc_pipe_generic_upcall, .downcall = gss_pipe_downcall, .destroy_msg = gss_pipe_destroy_msg, .open_pipe = gss_pipe_open_v0, @@ -1598,7 +1584,7 @@ static const struct rpc_pipe_ops gss_upcall_ops_v0 = { }; static const struct rpc_pipe_ops gss_upcall_ops_v1 = { - .upcall = gss_pipe_upcall, + .upcall = rpc_pipe_generic_upcall, .downcall = gss_pipe_downcall, .destroy_msg = gss_pipe_destroy_msg, .open_pipe = gss_pipe_open_v1, diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c index 4cb70dc6e7ad..e50502d8ceb7 100644 --- a/net/sunrpc/auth_unix.c +++ b/net/sunrpc/auth_unix.c @@ -129,6 +129,9 @@ unx_match(struct auth_cred *acred, struct rpc_cred *rcred, int flags) for (i = 0; i < groups ; i++) if (cred->uc_gids[i] != GROUP_AT(acred->group_info, i)) return 0; + if (groups < NFS_NGROUPS && + cred->uc_gids[groups] != NOGROUP) + return 0; return 1; } diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c index 91eaa26e4c42..3ad435a14ada 100644 --- a/net/sunrpc/backchannel_rqst.c +++ b/net/sunrpc/backchannel_rqst.c @@ -24,6 +24,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include <linux/tcp.h> #include <linux/slab.h> #include <linux/sunrpc/xprt.h> +#include <linux/export.h> #ifdef RPC_DEBUG #define RPCDBG_FACILITY RPCDBG_TRANS diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 72ad836e4fe0..465df9ae1046 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -1641,6 +1641,7 @@ int cache_register_net(struct cache_detail *cd, struct net *net) sunrpc_destroy_cache_detail(cd); return ret; } +EXPORT_SYMBOL_GPL(cache_register_net); int cache_register(struct cache_detail *cd) { @@ -1653,6 +1654,7 @@ void cache_unregister_net(struct cache_detail *cd, struct net *net) remove_cache_proc_entries(cd, net); sunrpc_destroy_cache_detail(cd); } +EXPORT_SYMBOL_GPL(cache_unregister_net); void cache_unregister(struct cache_detail *cd) { @@ -1778,7 +1780,7 @@ const struct file_operations cache_flush_operations_pipefs = { }; int sunrpc_cache_register_pipefs(struct dentry *parent, - const char *name, mode_t umode, + const char *name, umode_t umode, struct cache_detail *cd) { struct qstr q; diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index c5347d29cfb7..f0268ea7e711 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -850,7 +850,9 @@ rpc_restart_call_prepare(struct rpc_task *task) { if (RPC_ASSASSINATED(task)) return 0; - task->tk_action = rpc_prepare_task; + task->tk_action = call_start; + if (task->tk_ops->rpc_call_prepare != NULL) + task->tk_action = rpc_prepare_task; return 1; } EXPORT_SYMBOL_GPL(rpc_restart_call_prepare); diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index b181e3441323..63a7a7add21e 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -77,6 +77,26 @@ rpc_timeout_upcall_queue(struct work_struct *work) rpc_purge_list(rpci, &free_list, destroy_msg, -ETIMEDOUT); } +ssize_t rpc_pipe_generic_upcall(struct file *filp, struct rpc_pipe_msg *msg, + char __user *dst, size_t buflen) +{ + char *data = (char *)msg->data + msg->copied; + size_t mlen = min(msg->len - msg->copied, buflen); + unsigned long left; + + left = copy_to_user(dst, data, mlen); + if (left == mlen) { + msg->errno = -EFAULT; + return -EFAULT; + } + + mlen -= left; + msg->copied += mlen; + msg->errno = 0; + return mlen; +} +EXPORT_SYMBOL_GPL(rpc_pipe_generic_upcall); + /** * rpc_queue_upcall - queue an upcall message to userspace * @inode: inode of upcall pipe on which to queue given message @@ -165,7 +185,6 @@ static void rpc_i_callback(struct rcu_head *head) { struct inode *inode = container_of(head, struct inode, i_rcu); - INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(rpc_inode_cachep, RPC_I(inode)); } @@ -934,7 +953,7 @@ static void rpc_cachedir_depopulate(struct dentry *dentry) } struct dentry *rpc_create_cache_dir(struct dentry *parent, struct qstr *name, - mode_t umode, struct cache_detail *cd) + umode_t umode, struct cache_detail *cd) { return rpc_mkdir_populate(parent, name, umode, NULL, rpc_cachedir_populate, cd); @@ -1084,3 +1103,6 @@ void unregister_rpc_pipefs(void) kmem_cache_destroy(rpc_inode_cachep); unregister_filesystem(&rpc_pipe_fs_type); } + +/* Make 'mount -t rpc_pipefs ...' autoload this module. */ +MODULE_ALIAS("rpc_pipefs"); diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index e45d2fbbe5a8..8761bf8e36fc 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -114,6 +114,9 @@ static struct rpc_program rpcb_program; static struct rpc_clnt * rpcb_local_clnt; static struct rpc_clnt * rpcb_local_clnt4; +DEFINE_SPINLOCK(rpcb_clnt_lock); +unsigned int rpcb_users; + struct rpcbind_args { struct rpc_xprt * r_xprt; @@ -161,6 +164,56 @@ static void rpcb_map_release(void *data) kfree(map); } +static int rpcb_get_local(void) +{ + int cnt; + + spin_lock(&rpcb_clnt_lock); + if (rpcb_users) + rpcb_users++; + cnt = rpcb_users; + spin_unlock(&rpcb_clnt_lock); + + return cnt; +} + +void rpcb_put_local(void) +{ + struct rpc_clnt *clnt = rpcb_local_clnt; + struct rpc_clnt *clnt4 = rpcb_local_clnt4; + int shutdown; + + spin_lock(&rpcb_clnt_lock); + if (--rpcb_users == 0) { + rpcb_local_clnt = NULL; + rpcb_local_clnt4 = NULL; + } + shutdown = !rpcb_users; + spin_unlock(&rpcb_clnt_lock); + + if (shutdown) { + /* + * cleanup_rpcb_clnt - remove xprtsock's sysctls, unregister + */ + if (clnt4) + rpc_shutdown_client(clnt4); + if (clnt) + rpc_shutdown_client(clnt); + } +} + +static void rpcb_set_local(struct rpc_clnt *clnt, struct rpc_clnt *clnt4) +{ + /* Protected by rpcb_create_local_mutex */ + rpcb_local_clnt = clnt; + rpcb_local_clnt4 = clnt4; + smp_wmb(); + rpcb_users = 1; + dprintk("RPC: created new rpcb local clients (rpcb_local_clnt: " + "%p, rpcb_local_clnt4: %p)\n", rpcb_local_clnt, + rpcb_local_clnt4); +} + /* * Returns zero on success, otherwise a negative errno value * is returned. @@ -205,9 +258,7 @@ static int rpcb_create_local_unix(void) clnt4 = NULL; } - /* Protected by rpcb_create_local_mutex */ - rpcb_local_clnt = clnt; - rpcb_local_clnt4 = clnt4; + rpcb_set_local(clnt, clnt4); out: return result; @@ -259,9 +310,7 @@ static int rpcb_create_local_net(void) clnt4 = NULL; } - /* Protected by rpcb_create_local_mutex */ - rpcb_local_clnt = clnt; - rpcb_local_clnt4 = clnt4; + rpcb_set_local(clnt, clnt4); out: return result; @@ -271,16 +320,16 @@ out: * Returns zero on success, otherwise a negative errno value * is returned. */ -static int rpcb_create_local(void) +int rpcb_create_local(void) { static DEFINE_MUTEX(rpcb_create_local_mutex); int result = 0; - if (rpcb_local_clnt) + if (rpcb_get_local()) return result; mutex_lock(&rpcb_create_local_mutex); - if (rpcb_local_clnt) + if (rpcb_get_local()) goto out; if (rpcb_create_local_unix() != 0) @@ -382,11 +431,6 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port) struct rpc_message msg = { .rpc_argp = &map, }; - int error; - - error = rpcb_create_local(); - if (error) - return error; dprintk("RPC: %sregistering (%u, %u, %d, %u) with local " "rpcbind\n", (port ? "" : "un"), @@ -410,7 +454,7 @@ static int rpcb_register_inet4(const struct sockaddr *sap, unsigned short port = ntohs(sin->sin_port); int result; - map->r_addr = rpc_sockaddr2uaddr(sap); + map->r_addr = rpc_sockaddr2uaddr(sap, GFP_KERNEL); dprintk("RPC: %sregistering [%u, %u, %s, '%s'] with " "local rpcbind\n", (port ? "" : "un"), @@ -437,7 +481,7 @@ static int rpcb_register_inet6(const struct sockaddr *sap, unsigned short port = ntohs(sin6->sin6_port); int result; - map->r_addr = rpc_sockaddr2uaddr(sap); + map->r_addr = rpc_sockaddr2uaddr(sap, GFP_KERNEL); dprintk("RPC: %sregistering [%u, %u, %s, '%s'] with " "local rpcbind\n", (port ? "" : "un"), @@ -522,11 +566,7 @@ int rpcb_v4_register(const u32 program, const u32 version, struct rpc_message msg = { .rpc_argp = &map, }; - int error; - error = rpcb_create_local(); - if (error) - return error; if (rpcb_local_clnt4 == NULL) return -EPROTONOSUPPORT; @@ -686,7 +726,7 @@ void rpcb_getport_async(struct rpc_task *task) case RPCBVERS_4: case RPCBVERS_3: map->r_netid = rpc_peeraddr2str(clnt, RPC_DISPLAY_NETID); - map->r_addr = rpc_sockaddr2uaddr(sap); + map->r_addr = rpc_sockaddr2uaddr(sap, GFP_ATOMIC); map->r_owner = ""; break; case RPCBVERS_2: @@ -1060,15 +1100,3 @@ static struct rpc_program rpcb_program = { .version = rpcb_version, .stats = &rpcb_stats, }; - -/** - * cleanup_rpcb_clnt - remove xprtsock's sysctls, unregister - * - */ -void cleanup_rpcb_clnt(void) -{ - if (rpcb_local_clnt4) - rpc_shutdown_client(rpcb_local_clnt4); - if (rpcb_local_clnt) - rpc_shutdown_client(rpcb_local_clnt); -} diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index d12ffa545811..3341d8962786 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -18,6 +18,7 @@ #include <linux/smp.h> #include <linux/spinlock.h> #include <linux/mutex.h> +#include <linux/freezer.h> #include <linux/sunrpc/clnt.h> @@ -231,7 +232,7 @@ static int rpc_wait_bit_killable(void *word) { if (fatal_signal_pending(current)) return -ERESTARTSYS; - schedule(); + freezable_schedule(); return 0; } @@ -590,6 +591,27 @@ void rpc_prepare_task(struct rpc_task *task) task->tk_ops->rpc_call_prepare(task, task->tk_calldata); } +static void +rpc_init_task_statistics(struct rpc_task *task) +{ + /* Initialize retry counters */ + task->tk_garb_retry = 2; + task->tk_cred_retry = 2; + task->tk_rebind_retry = 2; + + /* starting timestamp */ + task->tk_start = ktime_get(); +} + +static void +rpc_reset_task_statistics(struct rpc_task *task) +{ + task->tk_timeouts = 0; + task->tk_flags &= ~(RPC_CALL_MAJORSEEN|RPC_TASK_KILLED|RPC_TASK_SENT); + + rpc_init_task_statistics(task); +} + /* * Helper that calls task->tk_ops->rpc_call_done if it exists */ @@ -602,6 +624,7 @@ void rpc_exit_task(struct rpc_task *task) WARN_ON(RPC_ASSASSINATED(task)); /* Always release the RPC slot and buffer memory */ xprt_release(task); + rpc_reset_task_statistics(task); } } } @@ -804,11 +827,6 @@ static void rpc_init_task(struct rpc_task *task, const struct rpc_task_setup *ta task->tk_calldata = task_setup_data->callback_data; INIT_LIST_HEAD(&task->tk_task); - /* Initialize retry counters */ - task->tk_garb_retry = 2; - task->tk_cred_retry = 2; - task->tk_rebind_retry = 2; - task->tk_priority = task_setup_data->priority - RPC_PRIORITY_LOW; task->tk_owner = current->tgid; @@ -818,8 +836,7 @@ static void rpc_init_task(struct rpc_task *task, const struct rpc_task_setup *ta if (task->tk_ops->rpc_call_prepare != NULL) task->tk_action = rpc_prepare_task; - /* starting timestamp */ - task->tk_start = ktime_get(); + rpc_init_task_statistics(task); dprintk("RPC: new task initialized, procpid %u\n", task_pid_nr(current)); diff --git a/net/sunrpc/socklib.c b/net/sunrpc/socklib.c index 10b4319ebbca..145e6784f508 100644 --- a/net/sunrpc/socklib.c +++ b/net/sunrpc/socklib.c @@ -14,6 +14,7 @@ #include <linux/pagemap.h> #include <linux/udp.h> #include <linux/sunrpc/xdr.h> +#include <linux/export.h> /** diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index 9d0809160994..8ec9778c3f4a 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c @@ -61,8 +61,6 @@ static struct pernet_operations sunrpc_net_ops = { extern struct cache_detail unix_gid_cache; -extern void cleanup_rpcb_clnt(void); - static int __init init_sunrpc(void) { @@ -102,7 +100,6 @@ out: static void __exit cleanup_sunrpc(void) { - cleanup_rpcb_clnt(); rpcauth_remove_module(); cleanup_socket_xprt(); svc_cleanup_xprt_sock(); diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 6a69a1131fb7..e4aabc02368b 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -167,6 +167,7 @@ svc_pool_map_alloc_arrays(struct svc_pool_map *m, unsigned int maxpools) fail_free: kfree(m->to_pool); + m->to_pool = NULL; fail: return -ENOMEM; } @@ -285,9 +286,10 @@ svc_pool_map_put(void) mutex_lock(&svc_pool_map_mutex); if (!--m->count) { - m->mode = SVC_POOL_DEFAULT; kfree(m->to_pool); + m->to_pool = NULL; kfree(m->pool_to); + m->pool_to = NULL; m->npools = 0; } @@ -295,6 +297,18 @@ svc_pool_map_put(void) } +static int svc_pool_map_get_node(unsigned int pidx) +{ + const struct svc_pool_map *m = &svc_pool_map; + + if (m->count) { + if (m->mode == SVC_POOL_PERCPU) + return cpu_to_node(m->pool_to[pidx]); + if (m->mode == SVC_POOL_PERNODE) + return m->pool_to[pidx]; + } + return NUMA_NO_NODE; +} /* * Set the given thread's cpus_allowed mask so that it * will only run on cpus in the given pool. @@ -354,6 +368,42 @@ svc_pool_for_cpu(struct svc_serv *serv, int cpu) return &serv->sv_pools[pidx % serv->sv_nrpools]; } +static int svc_rpcb_setup(struct svc_serv *serv) +{ + int err; + + err = rpcb_create_local(); + if (err) + return err; + + /* Remove any stale portmap registrations */ + svc_unregister(serv); + return 0; +} + +void svc_rpcb_cleanup(struct svc_serv *serv) +{ + svc_unregister(serv); + rpcb_put_local(); +} +EXPORT_SYMBOL_GPL(svc_rpcb_cleanup); + +static int svc_uses_rpcbind(struct svc_serv *serv) +{ + struct svc_program *progp; + unsigned int i; + + for (progp = serv->sv_program; progp; progp = progp->pg_next) { + for (i = 0; i < progp->pg_nvers; i++) { + if (progp->pg_vers[i] == NULL) + continue; + if (progp->pg_vers[i]->vs_hidden == 0) + return 1; + } + } + + return 0; +} /* * Create an RPC service @@ -419,8 +469,15 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, spin_lock_init(&pool->sp_lock); } - /* Remove any stale portmap registrations */ - svc_unregister(serv); + if (svc_uses_rpcbind(serv)) { + if (svc_rpcb_setup(serv) < 0) { + kfree(serv->sv_pools); + kfree(serv); + return NULL; + } + if (!serv->sv_shutdown) + serv->sv_shutdown = svc_rpcb_cleanup; + } return serv; } @@ -472,23 +529,25 @@ svc_destroy(struct svc_serv *serv) printk("svc_destroy: no threads for serv=%p!\n", serv); del_timer_sync(&serv->sv_temptimer); - - svc_close_all(&serv->sv_tempsocks); + /* + * The set of xprts (contained in the sv_tempsocks and + * sv_permsocks lists) is now constant, since it is modified + * only by accepting new sockets (done by service threads in + * svc_recv) or aging old ones (done by sv_temptimer), or + * configuration changes (excluded by whatever locking the + * caller is using--nfsd_mutex in the case of nfsd). So it's + * safe to traverse those lists and shut everything down: + */ + svc_close_all(serv); if (serv->sv_shutdown) serv->sv_shutdown(serv); - svc_close_all(&serv->sv_permsocks); - - BUG_ON(!list_empty(&serv->sv_permsocks)); - BUG_ON(!list_empty(&serv->sv_tempsocks)); - cache_clean_deferred(serv); if (svc_serv_is_pooled(serv)) svc_pool_map_put(); - svc_unregister(serv); kfree(serv->sv_pools); kfree(serv); } @@ -499,7 +558,7 @@ EXPORT_SYMBOL_GPL(svc_destroy); * We allocate pages and place them in rq_argpages. */ static int -svc_init_buffer(struct svc_rqst *rqstp, unsigned int size) +svc_init_buffer(struct svc_rqst *rqstp, unsigned int size, int node) { unsigned int pages, arghi; @@ -513,7 +572,7 @@ svc_init_buffer(struct svc_rqst *rqstp, unsigned int size) arghi = 0; BUG_ON(pages > RPCSVC_MAXPAGES); while (pages) { - struct page *p = alloc_page(GFP_KERNEL); + struct page *p = alloc_pages_node(node, GFP_KERNEL, 0); if (!p) break; rqstp->rq_pages[arghi++] = p; @@ -536,11 +595,11 @@ svc_release_buffer(struct svc_rqst *rqstp) } struct svc_rqst * -svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool) +svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node) { struct svc_rqst *rqstp; - rqstp = kzalloc(sizeof(*rqstp), GFP_KERNEL); + rqstp = kzalloc_node(sizeof(*rqstp), GFP_KERNEL, node); if (!rqstp) goto out_enomem; @@ -554,15 +613,15 @@ svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool) rqstp->rq_server = serv; rqstp->rq_pool = pool; - rqstp->rq_argp = kmalloc(serv->sv_xdrsize, GFP_KERNEL); + rqstp->rq_argp = kmalloc_node(serv->sv_xdrsize, GFP_KERNEL, node); if (!rqstp->rq_argp) goto out_thread; - rqstp->rq_resp = kmalloc(serv->sv_xdrsize, GFP_KERNEL); + rqstp->rq_resp = kmalloc_node(serv->sv_xdrsize, GFP_KERNEL, node); if (!rqstp->rq_resp) goto out_thread; - if (!svc_init_buffer(rqstp, serv->sv_max_mesg)) + if (!svc_init_buffer(rqstp, serv->sv_max_mesg, node)) goto out_thread; return rqstp; @@ -629,8 +688,8 @@ found_pool: * Create or destroy enough new threads to make the number * of threads the given number. If `pool' is non-NULL, applies * only to threads in that pool, otherwise round-robins between - * all pools. Must be called with a svc_get() reference and - * the BKL or another lock to protect access to svc_serv fields. + * all pools. Caller must ensure that mutual exclusion between this and + * server startup or shutdown. * * Destroying threads relies on the service threads filling in * rqstp->rq_task, which only the nfs ones do. Assumes the serv @@ -647,6 +706,7 @@ svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) struct svc_pool *chosen_pool; int error = 0; unsigned int state = serv->sv_nrthreads-1; + int node; if (pool == NULL) { /* The -1 assumes caller has done a svc_get() */ @@ -662,14 +722,16 @@ svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) nrservs--; chosen_pool = choose_pool(serv, pool, &state); - rqstp = svc_prepare_thread(serv, chosen_pool); + node = svc_pool_map_get_node(chosen_pool->sp_id); + rqstp = svc_prepare_thread(serv, chosen_pool, node); if (IS_ERR(rqstp)) { error = PTR_ERR(rqstp); break; } __module_get(serv->sv_module); - task = kthread_create(serv->sv_function, rqstp, serv->sv_name); + task = kthread_create_on_node(serv->sv_function, rqstp, + node, serv->sv_name); if (IS_ERR(task)) { error = PTR_ERR(task); module_put(serv->sv_module); @@ -769,7 +831,7 @@ static int __svc_rpcb_register4(const u32 program, const u32 version, return error; } -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) /* * Register an "inet6" protocol family netid with the local * rpcbind daemon via an rpcbind v4 SET request. @@ -815,7 +877,7 @@ static int __svc_rpcb_register6(const u32 program, const u32 version, return error; } -#endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */ +#endif /* IS_ENABLED(CONFIG_IPV6) */ /* * Register a kernel RPC service via rpcbind version 4. @@ -836,11 +898,11 @@ static int __svc_register(const char *progname, error = __svc_rpcb_register4(program, version, protocol, port); break; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) case PF_INET6: error = __svc_rpcb_register6(program, version, protocol, port); -#endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */ +#endif } if (error < 0) @@ -956,9 +1018,8 @@ static void svc_unregister(const struct svc_serv *serv) /* * Printk the given error with the address of the client that caused it. */ -static int -__attribute__ ((format (printf, 2, 3))) -svc_printk(struct svc_rqst *rqstp, const char *fmt, ...) +static __printf(2, 3) +int svc_printk(struct svc_rqst *rqstp, const char *fmt, ...) { va_list args; int r; diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index bd31208bbb61..74cb0d8e9ca1 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -14,6 +14,7 @@ #include <linux/sunrpc/svc_xprt.h> #include <linux/sunrpc/svcsock.h> #include <linux/sunrpc/xprt.h> +#include <linux/module.h> #define RPCDBG_FACILITY RPCDBG_SVCXPRT @@ -21,6 +22,7 @@ static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt); static int svc_deferred_recv(struct svc_rqst *rqstp); static struct cache_deferred_req *svc_defer(struct cache_req *req); static void svc_age_temp_xprts(unsigned long closure); +static void svc_delete_xprt(struct svc_xprt *xprt); /* apparently the "standard" is that clients close * idle connections after 5 minutes, servers after @@ -146,8 +148,8 @@ EXPORT_SYMBOL_GPL(svc_xprt_put); * Called by transport drivers to initialize the transport independent * portion of the transport instance. */ -void svc_xprt_init(struct svc_xprt_class *xcl, struct svc_xprt *xprt, - struct svc_serv *serv) +void svc_xprt_init(struct net *net, struct svc_xprt_class *xcl, + struct svc_xprt *xprt, struct svc_serv *serv) { memset(xprt, 0, sizeof(*xprt)); xprt->xpt_class = xcl; @@ -162,7 +164,7 @@ void svc_xprt_init(struct svc_xprt_class *xcl, struct svc_xprt *xprt, spin_lock_init(&xprt->xpt_lock); set_bit(XPT_BUSY, &xprt->xpt_flags); rpc_init_wait_queue(&xprt->xpt_bc_pending, "xpt_bc_pending"); - xprt->xpt_net = get_net(&init_net); + xprt->xpt_net = get_net(net); } EXPORT_SYMBOL_GPL(svc_xprt_init); @@ -178,13 +180,13 @@ static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl, .sin_addr.s_addr = htonl(INADDR_ANY), .sin_port = htons(port), }; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) struct sockaddr_in6 sin6 = { .sin6_family = AF_INET6, .sin6_addr = IN6ADDR_ANY_INIT, .sin6_port = htons(port), }; -#endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */ +#endif struct sockaddr *sap; size_t len; @@ -193,12 +195,12 @@ static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl, sap = (struct sockaddr *)&sin; len = sizeof(sin); break; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) case PF_INET6: sap = (struct sockaddr *)&sin6; len = sizeof(sin6); break; -#endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */ +#endif default: return ERR_PTR(-EAFNOSUPPORT); } @@ -254,8 +256,6 @@ EXPORT_SYMBOL_GPL(svc_create_xprt); */ void svc_xprt_copy_addrs(struct svc_rqst *rqstp, struct svc_xprt *xprt) { - struct sockaddr *sin; - memcpy(&rqstp->rq_addr, &xprt->xpt_remote, xprt->xpt_remotelen); rqstp->rq_addrlen = xprt->xpt_remotelen; @@ -263,15 +263,8 @@ void svc_xprt_copy_addrs(struct svc_rqst *rqstp, struct svc_xprt *xprt) * Destination address in request is needed for binding the * source address in RPC replies/callbacks later. */ - sin = (struct sockaddr *)&xprt->xpt_local; - switch (sin->sa_family) { - case AF_INET: - rqstp->rq_daddr.addr = ((struct sockaddr_in *)sin)->sin_addr; - break; - case AF_INET6: - rqstp->rq_daddr.addr6 = ((struct sockaddr_in6 *)sin)->sin6_addr; - break; - } + memcpy(&rqstp->rq_daddr, &xprt->xpt_local, xprt->xpt_locallen); + rqstp->rq_daddrlen = xprt->xpt_locallen; } EXPORT_SYMBOL_GPL(svc_xprt_copy_addrs); @@ -886,7 +879,7 @@ static void call_xpt_users(struct svc_xprt *xprt) /* * Remove a dead transport */ -void svc_delete_xprt(struct svc_xprt *xprt) +static void svc_delete_xprt(struct svc_xprt *xprt) { struct svc_serv *serv = xprt->xpt_server; struct svc_deferred_req *dr; @@ -901,14 +894,7 @@ void svc_delete_xprt(struct svc_xprt *xprt) spin_lock_bh(&serv->sv_lock); if (!test_and_set_bit(XPT_DETACHED, &xprt->xpt_flags)) list_del_init(&xprt->xpt_list); - /* - * The only time we're called while xpt_ready is still on a list - * is while the list itself is about to be destroyed (in - * svc_destroy). BUT svc_xprt_enqueue could still be attempting - * to add new entries to the sp_sockets list, so we can't leave - * a freed xprt on it. - */ - list_del_init(&xprt->xpt_ready); + BUG_ON(!list_empty(&xprt->xpt_ready)); if (test_bit(XPT_TEMP, &xprt->xpt_flags)) serv->sv_tmpcnt--; spin_unlock_bh(&serv->sv_lock); @@ -936,22 +922,48 @@ void svc_close_xprt(struct svc_xprt *xprt) } EXPORT_SYMBOL_GPL(svc_close_xprt); -void svc_close_all(struct list_head *xprt_list) +static void svc_close_list(struct list_head *xprt_list) +{ + struct svc_xprt *xprt; + + list_for_each_entry(xprt, xprt_list, xpt_list) { + set_bit(XPT_CLOSE, &xprt->xpt_flags); + set_bit(XPT_BUSY, &xprt->xpt_flags); + } +} + +void svc_close_all(struct svc_serv *serv) { + struct svc_pool *pool; struct svc_xprt *xprt; struct svc_xprt *tmp; + int i; + + svc_close_list(&serv->sv_tempsocks); + svc_close_list(&serv->sv_permsocks); + for (i = 0; i < serv->sv_nrpools; i++) { + pool = &serv->sv_pools[i]; + + spin_lock_bh(&pool->sp_lock); + while (!list_empty(&pool->sp_sockets)) { + xprt = list_first_entry(&pool->sp_sockets, struct svc_xprt, xpt_ready); + list_del_init(&xprt->xpt_ready); + } + spin_unlock_bh(&pool->sp_lock); + } /* - * The server is shutting down, and no more threads are running. - * svc_xprt_enqueue() might still be running, but at worst it - * will re-add the xprt to sp_sockets, which will soon get - * freed. So we don't bother with any more locking, and don't - * leave the close to the (nonexistent) server threads: + * At this point the sp_sockets lists will stay empty, since + * svc_enqueue will not add new entries without taking the + * sp_lock and checking XPT_BUSY. */ - list_for_each_entry_safe(xprt, tmp, xprt_list, xpt_list) { - set_bit(XPT_CLOSE, &xprt->xpt_flags); + list_for_each_entry_safe(xprt, tmp, &serv->sv_tempsocks, xpt_list) svc_delete_xprt(xprt); - } + list_for_each_entry_safe(xprt, tmp, &serv->sv_permsocks, xpt_list) + svc_delete_xprt(xprt); + + BUG_ON(!list_empty(&serv->sv_permsocks)); + BUG_ON(!list_empty(&serv->sv_tempsocks)); } /* diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index ce136323da8b..01153ead1dba 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -134,7 +134,7 @@ static void ip_map_init(struct cache_head *cnew, struct cache_head *citem) struct ip_map *item = container_of(citem, struct ip_map, h); strcpy(new->m_class, item->m_class); - ipv6_addr_copy(&new->m_addr, &item->m_addr); + new->m_addr = item->m_addr; } static void update(struct cache_head *cnew, struct cache_head *citem) { @@ -220,7 +220,7 @@ static int ip_map_parse(struct cache_detail *cd, ipv6_addr_set_v4mapped(address.s4.sin_addr.s_addr, &sin6.sin6_addr); break; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) case AF_INET6: memcpy(&sin6, &address.s6, sizeof(sin6)); break; @@ -274,7 +274,7 @@ static int ip_map_show(struct seq_file *m, } im = container_of(h, struct ip_map, h); /* class addr domain */ - ipv6_addr_copy(&addr, &im->m_addr); + addr = im->m_addr; if (test_bit(CACHE_VALID, &h->flags) && !test_bit(CACHE_NEGATIVE, &h->flags)) @@ -297,7 +297,7 @@ static struct ip_map *__ip_map_lookup(struct cache_detail *cd, char *class, struct cache_head *ch; strcpy(ip.m_class, class); - ipv6_addr_copy(&ip.m_addr, addr); + ip.m_addr = *addr; ch = sunrpc_cache_lookup(cd, &ip.h, hash_str(class, IP_HASHBITS) ^ hash_ip6(*addr)); diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 767d494de7a2..464570906f80 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -21,6 +21,7 @@ #include <linux/kernel.h> #include <linux/sched.h> +#include <linux/module.h> #include <linux/errno.h> #include <linux/fcntl.h> #include <linux/net.h> @@ -143,19 +144,20 @@ static void svc_set_cmsg_data(struct svc_rqst *rqstp, struct cmsghdr *cmh) cmh->cmsg_level = SOL_IP; cmh->cmsg_type = IP_PKTINFO; pki->ipi_ifindex = 0; - pki->ipi_spec_dst.s_addr = rqstp->rq_daddr.addr.s_addr; + pki->ipi_spec_dst.s_addr = + svc_daddr_in(rqstp)->sin_addr.s_addr; cmh->cmsg_len = CMSG_LEN(sizeof(*pki)); } break; case AF_INET6: { struct in6_pktinfo *pki = CMSG_DATA(cmh); + struct sockaddr_in6 *daddr = svc_daddr_in6(rqstp); cmh->cmsg_level = SOL_IPV6; cmh->cmsg_type = IPV6_PKTINFO; - pki->ipi6_ifindex = 0; - ipv6_addr_copy(&pki->ipi6_addr, - &rqstp->rq_daddr.addr6); + pki->ipi6_ifindex = daddr->sin6_scope_id; + pki->ipi6_addr = daddr->sin6_addr; cmh->cmsg_len = CMSG_LEN(sizeof(*pki)); } break; @@ -498,9 +500,13 @@ static int svc_udp_get_dest_address4(struct svc_rqst *rqstp, struct cmsghdr *cmh) { struct in_pktinfo *pki = CMSG_DATA(cmh); + struct sockaddr_in *daddr = svc_daddr_in(rqstp); + if (cmh->cmsg_type != IP_PKTINFO) return 0; - rqstp->rq_daddr.addr.s_addr = pki->ipi_spec_dst.s_addr; + + daddr->sin_family = AF_INET; + daddr->sin_addr.s_addr = pki->ipi_spec_dst.s_addr; return 1; } @@ -511,9 +517,14 @@ static int svc_udp_get_dest_address6(struct svc_rqst *rqstp, struct cmsghdr *cmh) { struct in6_pktinfo *pki = CMSG_DATA(cmh); + struct sockaddr_in6 *daddr = svc_daddr_in6(rqstp); + if (cmh->cmsg_type != IPV6_PKTINFO) return 0; - ipv6_addr_copy(&rqstp->rq_daddr.addr6, &pki->ipi6_addr); + + daddr->sin6_family = AF_INET6; + daddr->sin6_addr = pki->ipi6_addr; + daddr->sin6_scope_id = pki->ipi6_ifindex; return 1; } @@ -614,6 +625,7 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp) skb_free_datagram_locked(svsk->sk_sk, skb); return 0; } + rqstp->rq_daddrlen = svc_addr_len(svc_daddr(rqstp)); if (skb_is_nonlinear(skb)) { /* we have to copy */ @@ -727,7 +739,8 @@ static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv) { int err, level, optname, one = 1; - svc_xprt_init(&svc_udp_class, &svsk->sk_xprt, serv); + svc_xprt_init(sock_net(svsk->sk_sock->sk), &svc_udp_class, + &svsk->sk_xprt, serv); clear_bit(XPT_CACHE_AUTH, &svsk->sk_xprt.xpt_flags); svsk->sk_sk->sk_data_ready = svc_udp_data_ready; svsk->sk_sk->sk_write_space = svc_write_space; @@ -1331,7 +1344,8 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv) { struct sock *sk = svsk->sk_sk; - svc_xprt_init(&svc_tcp_class, &svsk->sk_xprt, serv); + svc_xprt_init(sock_net(svsk->sk_sock->sk), &svc_tcp_class, + &svsk->sk_xprt, serv); set_bit(XPT_CACHE_AUTH, &svsk->sk_xprt.xpt_flags); if (sk->sk_state == TCP_LISTEN) { dprintk("setting up TCP socket for listening\n"); @@ -1647,7 +1661,7 @@ static struct svc_xprt *svc_bc_create_socket(struct svc_serv *serv, return ERR_PTR(-ENOMEM); xprt = &svsk->sk_xprt; - svc_xprt_init(&svc_tcp_bc_class, xprt, serv); + svc_xprt_init(net, &svc_tcp_bc_class, xprt, serv); serv->sv_bc_xprt = xprt; diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 277ebd4bf095..593f4c605305 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -296,7 +296,7 @@ _copy_to_pages(struct page **pages, size_t pgbase, const char *p, size_t len) * Copies data into an arbitrary memory location from an array of pages * The copy is assumed to be non-overlapping. */ -static void +void _copy_from_pages(char *p, struct page **pages, size_t pgbase, size_t len) { struct page **pgfrom; @@ -324,6 +324,7 @@ _copy_from_pages(char *p, struct page **pages, size_t pgbase, size_t len) } while ((len -= copy) != 0); } +EXPORT_SYMBOL_GPL(_copy_from_pages); /* * xdr_shrink_bufhead diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index f4385e45a5fc..c64c0ef519b5 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -995,13 +995,11 @@ out_init_req: static void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req) { - if (xprt_dynamic_free_slot(xprt, req)) - return; - - memset(req, 0, sizeof(*req)); /* mark unused */ - spin_lock(&xprt->reserve_lock); - list_add(&req->rq_list, &xprt->free); + if (!xprt_dynamic_free_slot(xprt, req)) { + memset(req, 0, sizeof(*req)); /* mark unused */ + list_add(&req->rq_list, &xprt->free); + } rpc_wake_up_next(&xprt->backlog); spin_unlock(&xprt->reserve_lock); } diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index a385430c722a..894cb42db91d 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -50,6 +50,7 @@ #include <rdma/ib_verbs.h> #include <rdma/rdma_cm.h> #include <linux/sunrpc/svc_rdma.h> +#include <linux/export.h> #define RPCDBG_FACILITY RPCDBG_SVCXPRT @@ -452,7 +453,7 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv, if (!cma_xprt) return NULL; - svc_xprt_init(&svc_rdma_class, &cma_xprt->sc_xprt, serv); + svc_xprt_init(&init_net, &svc_rdma_class, &cma_xprt->sc_xprt, serv); INIT_LIST_HEAD(&cma_xprt->sc_accept_q); INIT_LIST_HEAD(&cma_xprt->sc_dto_q); INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q); diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index d7f97ef26590..55472c48825e 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -496,7 +496,7 @@ static int xs_nospace(struct rpc_task *task) struct rpc_rqst *req = task->tk_rqstp; struct rpc_xprt *xprt = req->rq_xprt; struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); - int ret = 0; + int ret = -EAGAIN; dprintk("RPC: %5u xmit incomplete (%u left of %u)\n", task->tk_pid, req->rq_slen - req->rq_bytes_sent, @@ -508,7 +508,6 @@ static int xs_nospace(struct rpc_task *task) /* Don't race with disconnect */ if (xprt_connected(xprt)) { if (test_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags)) { - ret = -EAGAIN; /* * Notify TCP that we're limited by the application * window size @@ -2530,8 +2529,10 @@ static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args, int err; err = xs_init_anyaddr(args->dstaddr->sa_family, (struct sockaddr *)&new->srcaddr); - if (err != 0) + if (err != 0) { + xprt_free(xprt); return ERR_PTR(err); + } } return xprt; diff --git a/net/sysctl_net.c b/net/sysctl_net.c index ca84212cfbfe..e75813904f26 100644 --- a/net/sysctl_net.c +++ b/net/sysctl_net.c @@ -12,6 +12,7 @@ */ #include <linux/mm.h> +#include <linux/export.h> #include <linux/sysctl.h> #include <linux/nsproxy.h> diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 759b318b5ffb..8eb87b11d100 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -39,13 +39,14 @@ #include "link.h" #include "port.h" #include "bcast.h" +#include "name_distr.h" #define MAX_PKT_DEFAULT_MCAST 1500 /* bcast link max packet size (fixed) */ #define BCLINK_WIN_DEFAULT 20 /* bcast link window size (default) */ /** - * struct bcbearer_pair - a pair of bearers used by broadcast link + * struct tipc_bcbearer_pair - a pair of bearers used by broadcast link * @primary: pointer to primary bearer * @secondary: pointer to secondary bearer * @@ -53,13 +54,13 @@ * to be paired. */ -struct bcbearer_pair { +struct tipc_bcbearer_pair { struct tipc_bearer *primary; struct tipc_bearer *secondary; }; /** - * struct bcbearer - bearer used by broadcast link + * struct tipc_bcbearer - bearer used by broadcast link * @bearer: (non-standard) broadcast bearer structure * @media: (non-standard) broadcast media structure * @bpairs: array of bearer pairs @@ -73,38 +74,40 @@ struct bcbearer_pair { * prevented through use of the spinlock "bc_lock". */ -struct bcbearer { +struct tipc_bcbearer { struct tipc_bearer bearer; - struct media media; - struct bcbearer_pair bpairs[MAX_BEARERS]; - struct bcbearer_pair bpairs_temp[TIPC_MAX_LINK_PRI + 1]; + struct tipc_media media; + struct tipc_bcbearer_pair bpairs[MAX_BEARERS]; + struct tipc_bcbearer_pair bpairs_temp[TIPC_MAX_LINK_PRI + 1]; struct tipc_node_map remains; struct tipc_node_map remains_new; }; /** - * struct bclink - link used for broadcast messages + * struct tipc_bclink - link used for broadcast messages * @link: (non-standard) broadcast link structure * @node: (non-standard) node structure representing b'cast link's peer node + * @bcast_nodes: map of broadcast-capable nodes * @retransmit_to: node that most recently requested a retransmit * * Handles sequence numbering, fragmentation, bundling, etc. */ -struct bclink { - struct link link; +struct tipc_bclink { + struct tipc_link link; struct tipc_node node; + struct tipc_node_map bcast_nodes; struct tipc_node *retransmit_to; }; +static struct tipc_bcbearer bcast_bearer; +static struct tipc_bclink bcast_link; -static struct bcbearer *bcbearer; -static struct bclink *bclink; -static struct link *bcl; -static DEFINE_SPINLOCK(bc_lock); +static struct tipc_bcbearer *bcbearer = &bcast_bearer; +static struct tipc_bclink *bclink = &bcast_link; +static struct tipc_link *bcl = &bcast_link.link; -/* broadcast-capable node map */ -struct tipc_node_map tipc_bcast_nmap; +static DEFINE_SPINLOCK(bc_lock); const char tipc_bclink_name[] = "broadcast-link"; @@ -112,11 +115,6 @@ static void tipc_nmap_diff(struct tipc_node_map *nm_a, struct tipc_node_map *nm_b, struct tipc_node_map *nm_diff); -static u32 buf_seqno(struct sk_buff *buf) -{ - return msg_seqno(buf_msg(buf)); -} - static u32 bcbuf_acks(struct sk_buff *buf) { return (u32)(unsigned long)TIPC_SKB_CB(buf)->handle; @@ -132,6 +130,19 @@ static void bcbuf_decr_acks(struct sk_buff *buf) bcbuf_set_acks(buf, bcbuf_acks(buf) - 1); } +void tipc_bclink_add_node(u32 addr) +{ + spin_lock_bh(&bc_lock); + tipc_nmap_add(&bclink->bcast_nodes, addr); + spin_unlock_bh(&bc_lock); +} + +void tipc_bclink_remove_node(u32 addr) +{ + spin_lock_bh(&bc_lock); + tipc_nmap_remove(&bclink->bcast_nodes, addr); + spin_unlock_bh(&bc_lock); +} static void bclink_set_last_sent(void) { @@ -221,14 +232,36 @@ void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked) struct sk_buff *next; unsigned int released = 0; - if (less_eq(acked, n_ptr->bclink.acked)) - return; - spin_lock_bh(&bc_lock); - /* Skip over packets that node has previously acknowledged */ - + /* Bail out if tx queue is empty (no clean up is required) */ crs = bcl->first_out; + if (!crs) + goto exit; + + /* Determine which messages need to be acknowledged */ + if (acked == INVALID_LINK_SEQ) { + /* + * Contact with specified node has been lost, so need to + * acknowledge sent messages only (if other nodes still exist) + * or both sent and unsent messages (otherwise) + */ + if (bclink->bcast_nodes.count) + acked = bcl->fsm_msg_cnt; + else + acked = bcl->next_out_no; + } else { + /* + * Bail out if specified sequence number does not correspond + * to a message that has been sent and not yet acknowledged + */ + if (less(acked, buf_seqno(crs)) || + less(bcl->fsm_msg_cnt, acked) || + less_eq(acked, n_ptr->bclink.acked)) + goto exit; + } + + /* Skip over packets that node has previously acknowledged */ while (crs && less_eq(buf_seqno(crs), n_ptr->bclink.acked)) crs = crs->next; @@ -236,7 +269,15 @@ void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked) while (crs && less_eq(buf_seqno(crs), acked)) { next = crs->next; - bcbuf_decr_acks(crs); + + if (crs != bcl->next_out) + bcbuf_decr_acks(crs); + else { + bcbuf_set_acks(crs, 0); + bcl->next_out = next; + bclink_set_last_sent(); + } + if (bcbuf_acks(crs) == 0) { bcl->first_out = next; bcl->out_queue_size--; @@ -255,6 +296,7 @@ void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked) } if (unlikely(released && !list_empty(&bcl->waiting_ports))) tipc_link_wakeup_ports(bcl, 0); +exit: spin_unlock_bh(&bc_lock); } @@ -266,7 +308,7 @@ void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked) static void bclink_send_ack(struct tipc_node *n_ptr) { - struct link *l_ptr = n_ptr->active_links[n_ptr->addr & 1]; + struct tipc_link *l_ptr = n_ptr->active_links[n_ptr->addr & 1]; if (l_ptr != NULL) tipc_link_send_proto_msg(l_ptr, STATE_MSG, 0, 0, 0, 0, 0); @@ -298,14 +340,9 @@ static void bclink_send_nack(struct tipc_node *n_ptr) msg_set_bcgap_to(msg, n_ptr->bclink.gap_to); msg_set_bcast_tag(msg, tipc_own_tag); - if (tipc_bearer_send(&bcbearer->bearer, buf, NULL)) { - bcl->stats.sent_nacks++; - buf_discard(buf); - } else { - tipc_bearer_schedule(bcl->b_ptr, bcl); - bcl->proto_msg_queue = buf; - bcl->stats.bearer_congs++; - } + tipc_bearer_send(&bcbearer->bearer, buf, NULL); + bcl->stats.sent_nacks++; + buf_discard(buf); /* * Ensure we doesn't send another NACK msg to the node @@ -406,13 +443,19 @@ int tipc_bclink_send_msg(struct sk_buff *buf) spin_lock_bh(&bc_lock); + if (!bclink->bcast_nodes.count) { + res = msg_data_sz(buf_msg(buf)); + buf_discard(buf); + goto exit; + } + res = tipc_link_send_buf(bcl, buf); - if (likely(res > 0)) + if (likely(res >= 0)) { bclink_set_last_sent(); - - bcl->stats.queue_sz_counts++; - bcl->stats.accu_queue_sz += bcl->out_queue_size; - + bcl->stats.queue_sz_counts++; + bcl->stats.accu_queue_sz += bcl->out_queue_size; + } +exit: spin_unlock_bh(&bc_lock); return res; } @@ -426,20 +469,28 @@ int tipc_bclink_send_msg(struct sk_buff *buf) void tipc_bclink_recv_pkt(struct sk_buff *buf) { struct tipc_msg *msg = buf_msg(buf); - struct tipc_node *node = tipc_node_find(msg_prevnode(msg)); + struct tipc_node *node; u32 next_in; u32 seqno; struct sk_buff *deferred; - if (unlikely(!node || !tipc_node_is_up(node) || !node->bclink.supported || - (msg_mc_netid(msg) != tipc_net_id))) { - buf_discard(buf); - return; - } + /* Screen out unwanted broadcast messages */ + + if (msg_mc_netid(msg) != tipc_net_id) + goto exit; + + node = tipc_node_find(msg_prevnode(msg)); + if (unlikely(!node)) + goto exit; + + tipc_node_lock(node); + if (unlikely(!node->bclink.supported)) + goto unlock; if (unlikely(msg_user(msg) == BCAST_PROTOCOL)) { + if (msg_type(msg) != STATE_MSG) + goto unlock; if (msg_destnode(msg) == tipc_own_addr) { - tipc_node_lock(node); tipc_bclink_acknowledge(node, msg_bcast_ack(msg)); tipc_node_unlock(node); spin_lock_bh(&bc_lock); @@ -449,18 +500,18 @@ void tipc_bclink_recv_pkt(struct sk_buff *buf) msg_bcgap_to(msg)); spin_unlock_bh(&bc_lock); } else { + tipc_node_unlock(node); tipc_bclink_peek_nack(msg_destnode(msg), msg_bcast_tag(msg), msg_bcgap_after(msg), msg_bcgap_to(msg)); } - buf_discard(buf); - return; + goto exit; } - tipc_node_lock(node); + /* Handle in-sequence broadcast message */ + receive: - deferred = node->bclink.deferred_head; next_in = mod(node->bclink.last_in + 1); seqno = msg_seqno(msg); @@ -474,7 +525,10 @@ receive: } if (likely(msg_isdata(msg))) { tipc_node_unlock(node); - tipc_port_recv_mcast(buf, NULL); + if (likely(msg_mcast(msg))) + tipc_port_recv_mcast(buf, NULL); + else + buf_discard(buf); } else if (msg_user(msg) == MSG_BUNDLER) { bcl->stats.recv_bundles++; bcl->stats.recv_bundled += msg_msgcnt(msg); @@ -487,18 +541,22 @@ receive: bcl->stats.recv_fragmented++; tipc_node_unlock(node); tipc_net_route_msg(buf); + } else if (msg_user(msg) == NAME_DISTRIBUTOR) { + tipc_node_unlock(node); + tipc_named_recv(buf); } else { tipc_node_unlock(node); - tipc_net_route_msg(buf); + buf_discard(buf); } + buf = NULL; + tipc_node_lock(node); + deferred = node->bclink.deferred_head; if (deferred && (buf_seqno(deferred) == mod(next_in + 1))) { - tipc_node_lock(node); buf = deferred; msg = buf_msg(buf); node->bclink.deferred_head = deferred->next; goto receive; } - return; } else if (less(next_in, seqno)) { u32 gap_after = node->bclink.gap_after; u32 gap_to = node->bclink.gap_to; @@ -513,6 +571,7 @@ receive: else if (less(gap_after, seqno) && less(seqno, gap_to)) node->bclink.gap_to = seqno; } + buf = NULL; if (bclink_ack_allowed(node->bclink.nack_sync)) { if (gap_to != gap_after) bclink_send_nack(node); @@ -520,9 +579,11 @@ receive: } } else { bcl->stats.duplicates++; - buf_discard(buf); } +unlock: tipc_node_unlock(node); +exit: + buf_discard(buf); } u32 tipc_bclink_acks_missing(struct tipc_node *n_ptr) @@ -535,10 +596,11 @@ u32 tipc_bclink_acks_missing(struct tipc_node *n_ptr) /** * tipc_bcbearer_send - send a packet through the broadcast pseudo-bearer * - * Send through as many bearers as necessary to reach all nodes - * that support TIPC multicasting. + * Send packet over as many bearers as necessary to reach all nodes + * that have joined the broadcast link. * - * Returns 0 if packet sent successfully, non-zero if not + * Returns 0 (packet sent successfully) under all circumstances, + * since the broadcast link's pseudo-bearer never blocks */ static int tipc_bcbearer_send(struct sk_buff *buf, @@ -547,18 +609,23 @@ static int tipc_bcbearer_send(struct sk_buff *buf, { int bp_index; - /* Prepare buffer for broadcasting (if first time trying to send it) */ + /* + * Prepare broadcast link message for reliable transmission, + * if first time trying to send it; + * preparation is skipped for broadcast link protocol messages + * since they are sent in an unreliable manner and don't need it + */ if (likely(!msg_non_seq(buf_msg(buf)))) { struct tipc_msg *msg; - bcbuf_set_acks(buf, tipc_bcast_nmap.count); + bcbuf_set_acks(buf, bclink->bcast_nodes.count); msg = buf_msg(buf); msg_set_non_seq(msg, 1); msg_set_mc_netid(msg, tipc_net_id); bcl->stats.sent_info++; - if (WARN_ON(!tipc_bcast_nmap.count)) { + if (WARN_ON(!bclink->bcast_nodes.count)) { dump_stack(); return 0; } @@ -566,7 +633,7 @@ static int tipc_bcbearer_send(struct sk_buff *buf, /* Send buffer over bearers until all targets reached */ - bcbearer->remains = tipc_bcast_nmap; + bcbearer->remains = bclink->bcast_nodes; for (bp_index = 0; bp_index < MAX_BEARERS; bp_index++) { struct tipc_bearer *p = bcbearer->bpairs[bp_index].primary; @@ -596,18 +663,12 @@ static int tipc_bcbearer_send(struct sk_buff *buf, } if (bcbearer->remains_new.count == 0) - return 0; + break; /* all targets reached */ bcbearer->remains = bcbearer->remains_new; } - /* - * Unable to reach all targets (indicate success, since currently - * there isn't code in place to properly block & unblock the - * pseudo-bearer used by the broadcast link) - */ - - return TIPC_OK; + return 0; } /** @@ -616,8 +677,8 @@ static int tipc_bcbearer_send(struct sk_buff *buf, void tipc_bcbearer_sort(void) { - struct bcbearer_pair *bp_temp = bcbearer->bpairs_temp; - struct bcbearer_pair *bp_curr; + struct tipc_bcbearer_pair *bp_temp = bcbearer->bpairs_temp; + struct tipc_bcbearer_pair *bp_curr; int b_index; int pri; @@ -667,27 +728,6 @@ void tipc_bcbearer_sort(void) spin_unlock_bh(&bc_lock); } -/** - * tipc_bcbearer_push - resolve bearer congestion - * - * Forces bclink to push out any unsent packets, until all packets are gone - * or congestion reoccurs. - * No locks set when function called - */ - -void tipc_bcbearer_push(void) -{ - struct tipc_bearer *b_ptr; - - spin_lock_bh(&bc_lock); - b_ptr = &bcbearer->bearer; - if (b_ptr->blocked) { - b_ptr->blocked = 0; - tipc_bearer_lock_push(b_ptr); - } - spin_unlock_bh(&bc_lock); -} - int tipc_bclink_stats(char *buf, const u32 buf_size) { @@ -759,25 +799,13 @@ int tipc_bclink_set_queue_limits(u32 limit) return 0; } -int tipc_bclink_init(void) +void tipc_bclink_init(void) { - bcbearer = kzalloc(sizeof(*bcbearer), GFP_ATOMIC); - bclink = kzalloc(sizeof(*bclink), GFP_ATOMIC); - if (!bcbearer || !bclink) { - warn("Multicast link creation failed, no memory\n"); - kfree(bcbearer); - bcbearer = NULL; - kfree(bclink); - bclink = NULL; - return -ENOMEM; - } - INIT_LIST_HEAD(&bcbearer->bearer.cong_links); bcbearer->bearer.media = &bcbearer->media; bcbearer->media.send_msg = tipc_bcbearer_send; - sprintf(bcbearer->media.name, "tipc-multicast"); + sprintf(bcbearer->media.name, "tipc-broadcast"); - bcl = &bclink->link; INIT_LIST_HEAD(&bcl->waiting_ports); bcl->next_out_no = 1; spin_lock_init(&bclink->node.lock); @@ -787,22 +815,16 @@ int tipc_bclink_init(void) bcl->b_ptr = &bcbearer->bearer; bcl->state = WORKING_WORKING; strlcpy(bcl->name, tipc_bclink_name, TIPC_MAX_LINK_NAME); - - return 0; } void tipc_bclink_stop(void) { spin_lock_bh(&bc_lock); - if (bcbearer) { - tipc_link_stop(bcl); - bcl = NULL; - kfree(bclink); - bclink = NULL; - kfree(bcbearer); - bcbearer = NULL; - } + tipc_link_stop(bcl); spin_unlock_bh(&bc_lock); + + memset(bclink, 0, sizeof(*bclink)); + memset(bcbearer, 0, sizeof(*bcbearer)); } @@ -871,9 +893,9 @@ static void tipc_nmap_diff(struct tipc_node_map *nm_a, * tipc_port_list_add - add a port to a port list, ensuring no duplicates */ -void tipc_port_list_add(struct port_list *pl_ptr, u32 port) +void tipc_port_list_add(struct tipc_port_list *pl_ptr, u32 port) { - struct port_list *item = pl_ptr; + struct tipc_port_list *item = pl_ptr; int i; int item_sz = PLSIZE; int cnt = pl_ptr->count; @@ -905,10 +927,10 @@ void tipc_port_list_add(struct port_list *pl_ptr, u32 port) * */ -void tipc_port_list_free(struct port_list *pl_ptr) +void tipc_port_list_free(struct tipc_port_list *pl_ptr) { - struct port_list *item; - struct port_list *next; + struct tipc_port_list *item; + struct tipc_port_list *next; for (item = pl_ptr->next; item; item = next) { next = item->next; diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h index 500c97f1c859..b009666c60b0 100644 --- a/net/tipc/bcast.h +++ b/net/tipc/bcast.h @@ -51,20 +51,18 @@ struct tipc_node_map { u32 map[MAX_NODES / WSIZE]; }; -extern struct tipc_node_map tipc_bcast_nmap; - #define PLSIZE 32 /** - * struct port_list - set of node local destination ports + * struct tipc_port_list - set of node local destination ports * @count: # of ports in set (only valid for first entry in list) * @next: pointer to next entry in list * @ports: array of port references */ -struct port_list { +struct tipc_port_list { int count; - struct port_list *next; + struct tipc_port_list *next; u32 ports[PLSIZE]; }; @@ -85,11 +83,13 @@ static inline int tipc_nmap_equal(struct tipc_node_map *nm_a, struct tipc_node_m return !memcmp(nm_a, nm_b, sizeof(*nm_a)); } -void tipc_port_list_add(struct port_list *pl_ptr, u32 port); -void tipc_port_list_free(struct port_list *pl_ptr); +void tipc_port_list_add(struct tipc_port_list *pl_ptr, u32 port); +void tipc_port_list_free(struct tipc_port_list *pl_ptr); -int tipc_bclink_init(void); +void tipc_bclink_init(void); void tipc_bclink_stop(void); +void tipc_bclink_add_node(u32 addr); +void tipc_bclink_remove_node(u32 addr); struct tipc_node *tipc_bclink_retransmit_to(void); void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked); int tipc_bclink_send_msg(struct sk_buff *buf); @@ -101,6 +101,5 @@ int tipc_bclink_stats(char *stats_buf, const u32 buf_size); int tipc_bclink_reset_stats(void); int tipc_bclink_set_queue_limits(u32 limit); void tipc_bcbearer_sort(void); -void tipc_bcbearer_push(void); #endif diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 85eba9c08ee9..329fb659fae4 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -41,7 +41,7 @@ #define MAX_ADDR_STR 32 -static struct media media_list[MAX_MEDIA]; +static struct tipc_media *media_list[MAX_MEDIA]; static u32 media_count; struct tipc_bearer tipc_bearers[MAX_BEARERS]; @@ -65,17 +65,31 @@ static int media_name_valid(const char *name) } /** - * media_find - locates specified media object by name + * tipc_media_find - locates specified media object by name */ -static struct media *media_find(const char *name) +struct tipc_media *tipc_media_find(const char *name) { - struct media *m_ptr; u32 i; - for (i = 0, m_ptr = media_list; i < media_count; i++, m_ptr++) { - if (!strcmp(m_ptr->name, name)) - return m_ptr; + for (i = 0; i < media_count; i++) { + if (!strcmp(media_list[i]->name, name)) + return media_list[i]; + } + return NULL; +} + +/** + * media_find_id - locates specified media object by type identifier + */ + +static struct tipc_media *media_find_id(u8 type) +{ + u32 i; + + for (i = 0; i < media_count; i++) { + if (media_list[i]->type_id == type) + return media_list[i]; } return NULL; } @@ -86,87 +100,34 @@ static struct media *media_find(const char *name) * Bearers for this media type must be activated separately at a later stage. */ -int tipc_register_media(u32 media_type, - char *name, - int (*enable)(struct tipc_bearer *), - void (*disable)(struct tipc_bearer *), - int (*send_msg)(struct sk_buff *, - struct tipc_bearer *, - struct tipc_media_addr *), - char *(*addr2str)(struct tipc_media_addr *a, - char *str_buf, int str_size), - struct tipc_media_addr *bcast_addr, - const u32 bearer_priority, - const u32 link_tolerance, /* [ms] */ - const u32 send_window_limit) +int tipc_register_media(struct tipc_media *m_ptr) { - struct media *m_ptr; - u32 media_id; - u32 i; int res = -EINVAL; write_lock_bh(&tipc_net_lock); - if (tipc_mode != TIPC_NET_MODE) { - warn("Media <%s> rejected, not in networked mode yet\n", name); + if (!media_name_valid(m_ptr->name)) goto exit; - } - if (!media_name_valid(name)) { - warn("Media <%s> rejected, illegal name\n", name); + if ((m_ptr->bcast_addr.media_id != m_ptr->type_id) || + !m_ptr->bcast_addr.broadcast) goto exit; - } - if (!bcast_addr) { - warn("Media <%s> rejected, no broadcast address\n", name); + if (m_ptr->priority > TIPC_MAX_LINK_PRI) goto exit; - } - if ((bearer_priority < TIPC_MIN_LINK_PRI) || - (bearer_priority > TIPC_MAX_LINK_PRI)) { - warn("Media <%s> rejected, illegal priority (%u)\n", name, - bearer_priority); + if ((m_ptr->tolerance < TIPC_MIN_LINK_TOL) || + (m_ptr->tolerance > TIPC_MAX_LINK_TOL)) goto exit; - } - if ((link_tolerance < TIPC_MIN_LINK_TOL) || - (link_tolerance > TIPC_MAX_LINK_TOL)) { - warn("Media <%s> rejected, illegal tolerance (%u)\n", name, - link_tolerance); + if (media_count >= MAX_MEDIA) goto exit; - } - - media_id = media_count++; - if (media_id >= MAX_MEDIA) { - warn("Media <%s> rejected, media limit reached (%u)\n", name, - MAX_MEDIA); - media_count--; + if (tipc_media_find(m_ptr->name) || media_find_id(m_ptr->type_id)) goto exit; - } - for (i = 0; i < media_id; i++) { - if (media_list[i].type_id == media_type) { - warn("Media <%s> rejected, duplicate type (%u)\n", name, - media_type); - media_count--; - goto exit; - } - if (!strcmp(name, media_list[i].name)) { - warn("Media <%s> rejected, duplicate name\n", name); - media_count--; - goto exit; - } - } - m_ptr = &media_list[media_id]; - m_ptr->type_id = media_type; - m_ptr->send_msg = send_msg; - m_ptr->enable_bearer = enable; - m_ptr->disable_bearer = disable; - m_ptr->addr2str = addr2str; - memcpy(&m_ptr->bcast_addr, bcast_addr, sizeof(*bcast_addr)); - strcpy(m_ptr->name, name); - m_ptr->priority = bearer_priority; - m_ptr->tolerance = link_tolerance; - m_ptr->window = send_window_limit; + media_list[media_count] = m_ptr; + media_count++; res = 0; exit: write_unlock_bh(&tipc_net_lock); + if (res) + warn("Media <%s> registration error\n", m_ptr->name); return res; } @@ -176,27 +137,19 @@ exit: void tipc_media_addr_printf(struct print_buf *pb, struct tipc_media_addr *a) { - struct media *m_ptr; - u32 media_type; - u32 i; - - media_type = ntohl(a->type); - for (i = 0, m_ptr = media_list; i < media_count; i++, m_ptr++) { - if (m_ptr->type_id == media_type) - break; - } + char addr_str[MAX_ADDR_STR]; + struct tipc_media *m_ptr; - if ((i < media_count) && (m_ptr->addr2str != NULL)) { - char addr_str[MAX_ADDR_STR]; + m_ptr = media_find_id(a->media_id); - tipc_printf(pb, "%s(%s)", m_ptr->name, - m_ptr->addr2str(a, addr_str, sizeof(addr_str))); - } else { - unchar *addr = (unchar *)&a->dev_addr; + if (m_ptr && !m_ptr->addr2str(a, addr_str, sizeof(addr_str))) + tipc_printf(pb, "%s(%s)", m_ptr->name, addr_str); + else { + u32 i; - tipc_printf(pb, "UNKNOWN(%u)", media_type); - for (i = 0; i < (sizeof(*a) - sizeof(a->type)); i++) - tipc_printf(pb, "-%02x", addr[i]); + tipc_printf(pb, "UNKNOWN(%u)", a->media_id); + for (i = 0; i < sizeof(a->value); i++) + tipc_printf(pb, "-%02x", a->value[i]); } } @@ -207,7 +160,6 @@ void tipc_media_addr_printf(struct print_buf *pb, struct tipc_media_addr *a) struct sk_buff *tipc_media_get_names(void) { struct sk_buff *buf; - struct media *m_ptr; int i; buf = tipc_cfg_reply_alloc(MAX_MEDIA * TLV_SPACE(TIPC_MAX_MEDIA_NAME)); @@ -215,9 +167,10 @@ struct sk_buff *tipc_media_get_names(void) return NULL; read_lock_bh(&tipc_net_lock); - for (i = 0, m_ptr = media_list; i < media_count; i++, m_ptr++) { - tipc_cfg_append_tlv(buf, TIPC_TLV_MEDIA_NAME, m_ptr->name, - strlen(m_ptr->name) + 1); + for (i = 0; i < media_count; i++) { + tipc_cfg_append_tlv(buf, TIPC_TLV_MEDIA_NAME, + media_list[i]->name, + strlen(media_list[i]->name) + 1); } read_unlock_bh(&tipc_net_lock); return buf; @@ -232,7 +185,7 @@ struct sk_buff *tipc_media_get_names(void) */ static int bearer_name_validate(const char *name, - struct bearer_name *name_parts) + struct tipc_bearer_names *name_parts) { char name_copy[TIPC_MAX_BEARER_NAME]; char *media_name; @@ -276,10 +229,10 @@ static int bearer_name_validate(const char *name, } /** - * bearer_find - locates bearer object with matching bearer name + * tipc_bearer_find - locates bearer object with matching bearer name */ -static struct tipc_bearer *bearer_find(const char *name) +struct tipc_bearer *tipc_bearer_find(const char *name) { struct tipc_bearer *b_ptr; u32 i; @@ -318,7 +271,6 @@ struct tipc_bearer *tipc_bearer_find_interface(const char *if_name) struct sk_buff *tipc_bearer_get_names(void) { struct sk_buff *buf; - struct media *m_ptr; struct tipc_bearer *b_ptr; int i, j; @@ -327,10 +279,10 @@ struct sk_buff *tipc_bearer_get_names(void) return NULL; read_lock_bh(&tipc_net_lock); - for (i = 0, m_ptr = media_list; i < media_count; i++, m_ptr++) { + for (i = 0; i < media_count; i++) { for (j = 0; j < MAX_BEARERS; j++) { b_ptr = &tipc_bearers[j]; - if (b_ptr->active && (b_ptr->media == m_ptr)) { + if (b_ptr->active && (b_ptr->media == media_list[i])) { tipc_cfg_append_tlv(buf, TIPC_TLV_BEARER_NAME, b_ptr->name, strlen(b_ptr->name) + 1); @@ -366,7 +318,7 @@ void tipc_bearer_remove_dest(struct tipc_bearer *b_ptr, u32 dest) static int bearer_push(struct tipc_bearer *b_ptr) { u32 res = 0; - struct link *ln, *tln; + struct tipc_link *ln, *tln; if (b_ptr->blocked) return 0; @@ -385,13 +337,9 @@ static int bearer_push(struct tipc_bearer *b_ptr) void tipc_bearer_lock_push(struct tipc_bearer *b_ptr) { - int res; - spin_lock_bh(&b_ptr->lock); - res = bearer_push(b_ptr); + bearer_push(b_ptr); spin_unlock_bh(&b_ptr->lock); - if (res) - tipc_bcbearer_push(); } @@ -416,7 +364,8 @@ void tipc_continue(struct tipc_bearer *b_ptr) * bearer.lock is busy */ -static void tipc_bearer_schedule_unlocked(struct tipc_bearer *b_ptr, struct link *l_ptr) +static void tipc_bearer_schedule_unlocked(struct tipc_bearer *b_ptr, + struct tipc_link *l_ptr) { list_move_tail(&l_ptr->link_list, &b_ptr->cong_links); } @@ -429,7 +378,7 @@ static void tipc_bearer_schedule_unlocked(struct tipc_bearer *b_ptr, struct link * bearer.lock is free */ -void tipc_bearer_schedule(struct tipc_bearer *b_ptr, struct link *l_ptr) +void tipc_bearer_schedule(struct tipc_bearer *b_ptr, struct tipc_link *l_ptr) { spin_lock_bh(&b_ptr->lock); tipc_bearer_schedule_unlocked(b_ptr, l_ptr); @@ -442,7 +391,8 @@ void tipc_bearer_schedule(struct tipc_bearer *b_ptr, struct link *l_ptr) * and if there is, try to resolve it before returning. * 'tipc_net_lock' is read_locked when this function is called */ -int tipc_bearer_resolve_congestion(struct tipc_bearer *b_ptr, struct link *l_ptr) +int tipc_bearer_resolve_congestion(struct tipc_bearer *b_ptr, + struct tipc_link *l_ptr) { int res = 1; @@ -461,7 +411,7 @@ int tipc_bearer_resolve_congestion(struct tipc_bearer *b_ptr, struct link *l_ptr * tipc_bearer_congested - determines if bearer is currently congested */ -int tipc_bearer_congested(struct tipc_bearer *b_ptr, struct link *l_ptr) +int tipc_bearer_congested(struct tipc_bearer *b_ptr, struct tipc_link *l_ptr) { if (unlikely(b_ptr->blocked)) return 1; @@ -477,8 +427,8 @@ int tipc_bearer_congested(struct tipc_bearer *b_ptr, struct link *l_ptr) int tipc_enable_bearer(const char *name, u32 disc_domain, u32 priority) { struct tipc_bearer *b_ptr; - struct media *m_ptr; - struct bearer_name b_name; + struct tipc_media *m_ptr; + struct tipc_bearer_names b_names; char addr_string[16]; u32 bearer_id; u32 with_this_prio; @@ -490,7 +440,7 @@ int tipc_enable_bearer(const char *name, u32 disc_domain, u32 priority) name); return -ENOPROTOOPT; } - if (!bearer_name_validate(name, &b_name)) { + if (!bearer_name_validate(name, &b_names)) { warn("Bearer <%s> rejected, illegal name\n", name); return -EINVAL; } @@ -515,10 +465,10 @@ int tipc_enable_bearer(const char *name, u32 disc_domain, u32 priority) write_lock_bh(&tipc_net_lock); - m_ptr = media_find(b_name.media_name); + m_ptr = tipc_media_find(b_names.media_name); if (!m_ptr) { warn("Bearer <%s> rejected, media <%s> not registered\n", name, - b_name.media_name); + b_names.media_name); goto exit; } @@ -565,6 +515,8 @@ restart: b_ptr->identity = bearer_id; b_ptr->media = m_ptr; + b_ptr->tolerance = m_ptr->tolerance; + b_ptr->window = m_ptr->window; b_ptr->net_plane = bearer_id + 'A'; b_ptr->active = 1; b_ptr->priority = priority; @@ -594,11 +546,11 @@ exit: int tipc_block_bearer(const char *name) { struct tipc_bearer *b_ptr = NULL; - struct link *l_ptr; - struct link *temp_l_ptr; + struct tipc_link *l_ptr; + struct tipc_link *temp_l_ptr; read_lock_bh(&tipc_net_lock); - b_ptr = bearer_find(name); + b_ptr = tipc_bearer_find(name); if (!b_ptr) { warn("Attempt to block unknown bearer <%s>\n", name); read_unlock_bh(&tipc_net_lock); @@ -608,6 +560,7 @@ int tipc_block_bearer(const char *name) info("Blocking bearer <%s>\n", name); spin_lock_bh(&b_ptr->lock); b_ptr->blocked = 1; + list_splice_init(&b_ptr->cong_links, &b_ptr->links); list_for_each_entry_safe(l_ptr, temp_l_ptr, &b_ptr->links, link_list) { struct tipc_node *n_ptr = l_ptr->owner; @@ -628,13 +581,14 @@ int tipc_block_bearer(const char *name) static void bearer_disable(struct tipc_bearer *b_ptr) { - struct link *l_ptr; - struct link *temp_l_ptr; + struct tipc_link *l_ptr; + struct tipc_link *temp_l_ptr; info("Disabling bearer <%s>\n", b_ptr->name); spin_lock_bh(&b_ptr->lock); b_ptr->blocked = 1; b_ptr->media->disable_bearer(b_ptr); + list_splice_init(&b_ptr->cong_links, &b_ptr->links); list_for_each_entry_safe(l_ptr, temp_l_ptr, &b_ptr->links, link_list) { tipc_link_delete(l_ptr); } @@ -650,7 +604,7 @@ int tipc_disable_bearer(const char *name) int res; write_lock_bh(&tipc_net_lock); - b_ptr = bearer_find(name); + b_ptr = tipc_bearer_find(name); if (b_ptr == NULL) { warn("Attempt to disable unknown bearer <%s>\n", name); res = -EINVAL; diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h index 5ad70eff1ebf..d3eac56b8c21 100644 --- a/net/tipc/bearer.h +++ b/net/tipc/bearer.h @@ -39,8 +39,19 @@ #include "bcast.h" -#define MAX_BEARERS 8 -#define MAX_MEDIA 4 +#define MAX_BEARERS 2 +#define MAX_MEDIA 2 + +/* + * Identifiers associated with TIPC message header media address info + * + * - address info field is 20 bytes long + * - media type identifier located at offset 3 + * - remaining bytes vary according to media type + */ + +#define TIPC_MEDIA_ADDR_SIZE 20 +#define TIPC_MEDIA_TYPE_OFFSET 3 /* * Identifiers of supported TIPC media types @@ -48,27 +59,29 @@ #define TIPC_MEDIA_TYPE_ETH 1 /* - * Destination address structure used by TIPC bearers when sending messages - * - * IMPORTANT: The fields of this structure MUST be stored using the specified - * byte order indicated below, as the structure is exchanged between nodes - * as part of a link setup process. + * struct tipc_media_addr - destination address used by TIPC bearers + * @value: address info (format defined by media) + * @media_id: TIPC media type identifier + * @broadcast: non-zero if address is a broadcast address */ + struct tipc_media_addr { - __be32 type; /* bearer type (network byte order) */ - union { - __u8 eth_addr[6]; /* 48 bit Ethernet addr (byte array) */ - } dev_addr; + u8 value[TIPC_MEDIA_ADDR_SIZE]; + u8 media_id; + u8 broadcast; }; struct tipc_bearer; /** - * struct media - TIPC media information available to internal users + * struct tipc_media - TIPC media information available to internal users * @send_msg: routine which handles buffer transmission * @enable_bearer: routine which enables a bearer * @disable_bearer: routine which disables a bearer - * @addr2str: routine which converts bearer's address to string form + * @addr2str: routine which converts media address to string + * @str2addr: routine which converts media address from string + * @addr2msg: routine which converts media address to protocol message area + * @msg2addr: routine which converts media address from protocol message area * @bcast_addr: media address used in broadcasting * @priority: default link (and bearer) priority * @tolerance: default time (in ms) before declaring link failure @@ -77,14 +90,16 @@ struct tipc_bearer; * @name: media name */ -struct media { +struct tipc_media { int (*send_msg)(struct sk_buff *buf, struct tipc_bearer *b_ptr, struct tipc_media_addr *dest); int (*enable_bearer)(struct tipc_bearer *b_ptr); void (*disable_bearer)(struct tipc_bearer *b_ptr); - char *(*addr2str)(struct tipc_media_addr *a, - char *str_buf, int str_size); + int (*addr2str)(struct tipc_media_addr *a, char *str_buf, int str_size); + int (*str2addr)(struct tipc_media_addr *a, char *str_buf); + int (*addr2msg)(struct tipc_media_addr *a, char *msg_area); + int (*msg2addr)(struct tipc_media_addr *a, char *msg_area); struct tipc_media_addr bcast_addr; u32 priority; u32 tolerance; @@ -103,6 +118,8 @@ struct media { * @name: bearer name (format = media:interface) * @media: ptr to media structure associated with bearer * @priority: default link priority for bearer + * @window: default window size for bearer + * @tolerance: default link tolerance for bearer * @identity: array index of this bearer within TIPC bearer array * @link_req: ptr to (optional) structure making periodic link setup requests * @links: list of non-congested links associated with bearer @@ -122,10 +139,12 @@ struct tipc_bearer { struct tipc_media_addr addr; /* initalized by media */ char name[TIPC_MAX_BEARER_NAME]; spinlock_t lock; - struct media *media; + struct tipc_media *media; u32 priority; + u32 window; + u32 tolerance; u32 identity; - struct link_req *link_req; + struct tipc_link_req *link_req; struct list_head links; struct list_head cong_links; int active; @@ -133,28 +152,19 @@ struct tipc_bearer { struct tipc_node_map nodes; }; -struct bearer_name { +struct tipc_bearer_names { char media_name[TIPC_MAX_MEDIA_NAME]; char if_name[TIPC_MAX_IF_NAME]; }; -struct link; +struct tipc_link; extern struct tipc_bearer tipc_bearers[]; /* * TIPC routines available to supported media types */ -int tipc_register_media(u32 media_type, - char *media_name, int (*enable)(struct tipc_bearer *), - void (*disable)(struct tipc_bearer *), - int (*send_msg)(struct sk_buff *, - struct tipc_bearer *, struct tipc_media_addr *), - char *(*addr2str)(struct tipc_media_addr *a, - char *str_buf, int str_size), - struct tipc_media_addr *bcast_addr, const u32 bearer_priority, - const u32 link_tolerance, /* [ms] */ - const u32 send_window_limit); +int tipc_register_media(struct tipc_media *m_ptr); void tipc_recv_msg(struct sk_buff *buf, struct tipc_bearer *tb_ptr); @@ -170,16 +180,21 @@ int tipc_disable_bearer(const char *name); int tipc_eth_media_start(void); void tipc_eth_media_stop(void); +int tipc_media_set_priority(const char *name, u32 new_value); +int tipc_media_set_window(const char *name, u32 new_value); void tipc_media_addr_printf(struct print_buf *pb, struct tipc_media_addr *a); struct sk_buff *tipc_media_get_names(void); struct sk_buff *tipc_bearer_get_names(void); void tipc_bearer_add_dest(struct tipc_bearer *b_ptr, u32 dest); void tipc_bearer_remove_dest(struct tipc_bearer *b_ptr, u32 dest); -void tipc_bearer_schedule(struct tipc_bearer *b_ptr, struct link *l_ptr); +void tipc_bearer_schedule(struct tipc_bearer *b_ptr, struct tipc_link *l_ptr); +struct tipc_bearer *tipc_bearer_find(const char *name); struct tipc_bearer *tipc_bearer_find_interface(const char *if_name); -int tipc_bearer_resolve_congestion(struct tipc_bearer *b_ptr, struct link *l_ptr); -int tipc_bearer_congested(struct tipc_bearer *b_ptr, struct link *l_ptr); +struct tipc_media *tipc_media_find(const char *name); +int tipc_bearer_resolve_congestion(struct tipc_bearer *b_ptr, + struct tipc_link *l_ptr); +int tipc_bearer_congested(struct tipc_bearer *b_ptr, struct tipc_link *l_ptr); void tipc_bearer_stop(void); void tipc_bearer_lock_push(struct tipc_bearer *b_ptr); diff --git a/net/tipc/config.c b/net/tipc/config.c index b25a396b7e1e..4785bf26cdf4 100644 --- a/net/tipc/config.c +++ b/net/tipc/config.c @@ -184,13 +184,12 @@ static struct sk_buff *cfg_set_own_addr(void) " (cannot change node address once assigned)"); /* - * Must release all spinlocks before calling start_net() because - * Linux version of TIPC calls eth_media_start() which calls - * register_netdevice_notifier() which may block! - * - * Temporarily releasing the lock should be harmless for non-Linux TIPC, - * but Linux version of eth_media_start() should really be reworked - * so that it can be called with spinlocks held. + * Must temporarily release configuration spinlock while switching into + * networking mode as it calls tipc_eth_media_start(), which may sleep. + * Releasing the lock is harmless as other locally-issued configuration + * commands won't occur until this one completes, and remotely-issued + * configuration commands can't be received until a local configuration + * command to enable the first bearer is received and processed. */ spin_unlock_bh(&config_lock); diff --git a/net/tipc/config.h b/net/tipc/config.h index 443159a166fd..80da6ebc2785 100644 --- a/net/tipc/config.h +++ b/net/tipc/config.h @@ -65,7 +65,6 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *req_tlv_area, int req_tlv_space, int headroom); -void tipc_cfg_link_event(u32 addr, char *name, int up); int tipc_cfg_init(void); void tipc_cfg_stop(void); diff --git a/net/tipc/core.c b/net/tipc/core.c index 943b6af84265..2691cd57b8a8 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -34,6 +34,8 @@ * POSSIBILITY OF SUCH DAMAGE. */ +#include <linux/module.h> + #include "core.h" #include "ref.h" #include "name_table.h" @@ -97,8 +99,8 @@ struct sk_buff *tipc_buf_acquire(u32 size) static void tipc_core_stop_net(void) { - tipc_eth_media_stop(); tipc_net_stop(); + tipc_eth_media_stop(); } /** diff --git a/net/tipc/discover.c b/net/tipc/discover.c index 0987933155b9..a00e5f811569 100644 --- a/net/tipc/discover.c +++ b/net/tipc/discover.c @@ -45,7 +45,7 @@ /** - * struct link_req - information about an ongoing link setup request + * struct tipc_link_req - information about an ongoing link setup request * @bearer: bearer issuing requests * @dest: destination address for request messages * @domain: network domain to which links can be established @@ -54,7 +54,7 @@ * @timer: timer governing period between requests * @timer_intv: current interval between requests (in ms) */ -struct link_req { +struct tipc_link_req { struct tipc_bearer *bearer; struct tipc_media_addr dest; u32 domain; @@ -84,7 +84,7 @@ static struct sk_buff *tipc_disc_init_msg(u32 type, msg_set_non_seq(msg, 1); msg_set_dest_domain(msg, dest_domain); msg_set_bc_netid(msg, tipc_net_id); - msg_set_media_addr(msg, &b_ptr->addr); + b_ptr->media->addr2msg(&b_ptr->addr, msg_media_addr(msg)); } return buf; } @@ -120,7 +120,7 @@ static void disc_dupl_alert(struct tipc_bearer *b_ptr, u32 node_addr, void tipc_disc_recv_msg(struct sk_buff *buf, struct tipc_bearer *b_ptr) { struct tipc_node *n_ptr; - struct link *link; + struct tipc_link *link; struct tipc_media_addr media_addr, *addr; struct sk_buff *rbuf; struct tipc_msg *msg = buf_msg(buf); @@ -130,12 +130,15 @@ void tipc_disc_recv_msg(struct sk_buff *buf, struct tipc_bearer *b_ptr) u32 type = msg_type(msg); int link_fully_up; - msg_get_media_addr(msg, &media_addr); + media_addr.broadcast = 1; + b_ptr->media->msg2addr(&media_addr, msg_media_addr(msg)); buf_discard(buf); /* Validate discovery message from requesting node */ if (net_id != tipc_net_id) return; + if (media_addr.broadcast) + return; if (!tipc_addr_domain_valid(dest)) return; if (!tipc_addr_node_valid(orig)) @@ -159,12 +162,6 @@ void tipc_disc_recv_msg(struct sk_buff *buf, struct tipc_bearer *b_ptr) } tipc_node_lock(n_ptr); - /* Don't talk to neighbor during cleanup after last session */ - if (n_ptr->cleanup_required) { - tipc_node_unlock(n_ptr); - return; - } - link = n_ptr->links[b_ptr->identity]; /* Create a link endpoint for this bearer, if necessary */ @@ -221,7 +218,7 @@ void tipc_disc_recv_msg(struct sk_buff *buf, struct tipc_bearer *b_ptr) * and is either not currently searching or is searching at a slow rate */ -static void disc_update(struct link_req *req) +static void disc_update(struct tipc_link_req *req) { if (!req->num_nodes) { if ((req->timer_intv == TIPC_LINK_REQ_INACTIVE) || @@ -237,7 +234,7 @@ static void disc_update(struct link_req *req) * @req: ptr to link request structure */ -void tipc_disc_add_dest(struct link_req *req) +void tipc_disc_add_dest(struct tipc_link_req *req) { req->num_nodes++; } @@ -247,7 +244,7 @@ void tipc_disc_add_dest(struct link_req *req) * @req: ptr to link request structure */ -void tipc_disc_remove_dest(struct link_req *req) +void tipc_disc_remove_dest(struct tipc_link_req *req) { req->num_nodes--; disc_update(req); @@ -258,7 +255,7 @@ void tipc_disc_remove_dest(struct link_req *req) * @req: ptr to link request structure */ -static void disc_send_msg(struct link_req *req) +static void disc_send_msg(struct tipc_link_req *req) { if (!req->bearer->blocked) tipc_bearer_send(req->bearer, req->buf, &req->dest); @@ -271,7 +268,7 @@ static void disc_send_msg(struct link_req *req) * Called whenever a link setup request timer associated with a bearer expires. */ -static void disc_timeout(struct link_req *req) +static void disc_timeout(struct tipc_link_req *req) { int max_delay; @@ -319,7 +316,7 @@ exit: int tipc_disc_create(struct tipc_bearer *b_ptr, struct tipc_media_addr *dest, u32 dest_domain) { - struct link_req *req; + struct tipc_link_req *req; req = kmalloc(sizeof(*req), GFP_ATOMIC); if (!req) @@ -348,7 +345,7 @@ int tipc_disc_create(struct tipc_bearer *b_ptr, * @req: ptr to link request structure */ -void tipc_disc_delete(struct link_req *req) +void tipc_disc_delete(struct tipc_link_req *req) { k_cancel_timer(&req->timer); k_term_timer(&req->timer); diff --git a/net/tipc/discover.h b/net/tipc/discover.h index a3af595b86cb..75b67c403aa3 100644 --- a/net/tipc/discover.h +++ b/net/tipc/discover.h @@ -37,13 +37,13 @@ #ifndef _TIPC_DISCOVER_H #define _TIPC_DISCOVER_H -struct link_req; +struct tipc_link_req; int tipc_disc_create(struct tipc_bearer *b_ptr, struct tipc_media_addr *dest, u32 dest_domain); -void tipc_disc_delete(struct link_req *req); -void tipc_disc_add_dest(struct link_req *req); -void tipc_disc_remove_dest(struct link_req *req); +void tipc_disc_delete(struct tipc_link_req *req); +void tipc_disc_add_dest(struct tipc_link_req *req); +void tipc_disc_remove_dest(struct tipc_link_req *req); void tipc_disc_recv_msg(struct sk_buff *buf, struct tipc_bearer *b_ptr); #endif diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c index b69092eb95d8..527e3f0e165d 100644 --- a/net/tipc/eth_media.c +++ b/net/tipc/eth_media.c @@ -2,7 +2,7 @@ * net/tipc/eth_media.c: Ethernet bearer support for TIPC * * Copyright (c) 2001-2007, Ericsson AB - * Copyright (c) 2005-2007, Wind River Systems + * Copyright (c) 2005-2008, 2011, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -37,29 +37,46 @@ #include "core.h" #include "bearer.h" -#define MAX_ETH_BEARERS 2 -#define ETH_LINK_PRIORITY TIPC_DEF_LINK_PRI -#define ETH_LINK_TOLERANCE TIPC_DEF_LINK_TOL -#define ETH_LINK_WINDOW TIPC_DEF_LINK_WIN +#define MAX_ETH_BEARERS MAX_BEARERS + +#define ETH_ADDR_OFFSET 4 /* message header offset of MAC address */ /** * struct eth_bearer - Ethernet bearer data structure * @bearer: ptr to associated "generic" bearer structure * @dev: ptr to associated Ethernet network device * @tipc_packet_type: used in binding TIPC to Ethernet driver + * @cleanup: work item used when disabling bearer */ struct eth_bearer { struct tipc_bearer *bearer; struct net_device *dev; struct packet_type tipc_packet_type; + struct work_struct cleanup; }; +static struct tipc_media eth_media_info; static struct eth_bearer eth_bearers[MAX_ETH_BEARERS]; static int eth_started; static struct notifier_block notifier; /** + * eth_media_addr_set - initialize Ethernet media address structure + * + * Media-dependent "value" field stores MAC address in first 6 bytes + * and zeroes out the remaining bytes. + */ + +static void eth_media_addr_set(struct tipc_media_addr *a, char *mac) +{ + memcpy(a->value, mac, ETH_ALEN); + memset(a->value + ETH_ALEN, 0, sizeof(a->value) - ETH_ALEN); + a->media_id = TIPC_MEDIA_TYPE_ETH; + a->broadcast = !memcmp(mac, eth_media_info.bcast_addr.value, ETH_ALEN); +} + +/** * send_msg - send a TIPC message out over an Ethernet interface */ @@ -85,7 +102,7 @@ static int send_msg(struct sk_buff *buf, struct tipc_bearer *tb_ptr, skb_reset_network_header(clone); clone->dev = dev; - dev_hard_header(clone, dev, ETH_P_TIPC, &dest->dev_addr.eth_addr, + dev_hard_header(clone, dev, ETH_P_TIPC, dest->value, dev->dev_addr, clone->len); dev_queue_xmit(clone); return 0; @@ -144,31 +161,27 @@ static int enable_bearer(struct tipc_bearer *tb_ptr) /* Find device with specified name */ + read_lock(&dev_base_lock); for_each_netdev(&init_net, pdev) { if (!strncmp(pdev->name, driver_name, IFNAMSIZ)) { dev = pdev; + dev_hold(dev); break; } } + read_unlock(&dev_base_lock); if (!dev) return -ENODEV; - /* Find Ethernet bearer for device (or create one) */ - - while ((eb_ptr != stop) && eb_ptr->dev && (eb_ptr->dev != dev)) - eb_ptr++; - if (eb_ptr == stop) - return -EDQUOT; - if (!eb_ptr->dev) { - eb_ptr->dev = dev; - eb_ptr->tipc_packet_type.type = htons(ETH_P_TIPC); - eb_ptr->tipc_packet_type.dev = dev; - eb_ptr->tipc_packet_type.func = recv_msg; - eb_ptr->tipc_packet_type.af_packet_priv = eb_ptr; - INIT_LIST_HEAD(&(eb_ptr->tipc_packet_type.list)); - dev_hold(dev); - dev_add_pack(&eb_ptr->tipc_packet_type); - } + /* Create Ethernet bearer for device */ + + eb_ptr->dev = dev; + eb_ptr->tipc_packet_type.type = htons(ETH_P_TIPC); + eb_ptr->tipc_packet_type.dev = dev; + eb_ptr->tipc_packet_type.func = recv_msg; + eb_ptr->tipc_packet_type.af_packet_priv = eb_ptr; + INIT_LIST_HEAD(&(eb_ptr->tipc_packet_type.list)); + dev_add_pack(&eb_ptr->tipc_packet_type); /* Associate TIPC bearer with Ethernet bearer */ @@ -176,22 +189,41 @@ static int enable_bearer(struct tipc_bearer *tb_ptr) tb_ptr->usr_handle = (void *)eb_ptr; tb_ptr->mtu = dev->mtu; tb_ptr->blocked = 0; - tb_ptr->addr.type = htonl(TIPC_MEDIA_TYPE_ETH); - memcpy(&tb_ptr->addr.dev_addr, dev->dev_addr, ETH_ALEN); + eth_media_addr_set(&tb_ptr->addr, (char *)dev->dev_addr); return 0; } /** + * cleanup_bearer - break association between Ethernet bearer and interface + * + * This routine must be invoked from a work queue because it can sleep. + */ + +static void cleanup_bearer(struct work_struct *work) +{ + struct eth_bearer *eb_ptr = + container_of(work, struct eth_bearer, cleanup); + + dev_remove_pack(&eb_ptr->tipc_packet_type); + dev_put(eb_ptr->dev); + eb_ptr->dev = NULL; +} + +/** * disable_bearer - detach TIPC bearer from an Ethernet interface * - * We really should do dev_remove_pack() here, but this function can not be - * called at tasklet level. => Use eth_bearer->bearer as a flag to throw away - * incoming buffers, & postpone dev_remove_pack() to eth_media_stop() on exit. + * Mark Ethernet bearer as inactive so that incoming buffers are thrown away, + * then get worker thread to complete bearer cleanup. (Can't do cleanup + * here because cleanup code needs to sleep and caller holds spinlocks.) */ static void disable_bearer(struct tipc_bearer *tb_ptr) { - ((struct eth_bearer *)tb_ptr->usr_handle)->bearer = NULL; + struct eth_bearer *eb_ptr = (struct eth_bearer *)tb_ptr->usr_handle; + + eb_ptr->bearer = NULL; + INIT_WORK(&eb_ptr->cleanup, cleanup_bearer); + schedule_work(&eb_ptr->cleanup); } /** @@ -250,17 +282,81 @@ static int recv_notification(struct notifier_block *nb, unsigned long evt, * eth_addr2str - convert Ethernet address to string */ -static char *eth_addr2str(struct tipc_media_addr *a, char *str_buf, int str_size) +static int eth_addr2str(struct tipc_media_addr *a, char *str_buf, int str_size) +{ + if (str_size < 18) /* 18 = strlen("aa:bb:cc:dd:ee:ff\0") */ + return 1; + + sprintf(str_buf, "%pM", a->value); + return 0; +} + +/** + * eth_str2addr - convert string to Ethernet address + */ + +static int eth_str2addr(struct tipc_media_addr *a, char *str_buf) +{ + char mac[ETH_ALEN]; + int r; + + r = sscanf(str_buf, "%02x:%02x:%02x:%02x:%02x:%02x", + (u32 *)&mac[0], (u32 *)&mac[1], (u32 *)&mac[2], + (u32 *)&mac[3], (u32 *)&mac[4], (u32 *)&mac[5]); + + if (r != ETH_ALEN) + return 1; + + eth_media_addr_set(a, mac); + return 0; +} + +/** + * eth_str2addr - convert Ethernet address format to message header format + */ + +static int eth_addr2msg(struct tipc_media_addr *a, char *msg_area) +{ + memset(msg_area, 0, TIPC_MEDIA_ADDR_SIZE); + msg_area[TIPC_MEDIA_TYPE_OFFSET] = TIPC_MEDIA_TYPE_ETH; + memcpy(msg_area + ETH_ADDR_OFFSET, a->value, ETH_ALEN); + return 0; +} + +/** + * eth_str2addr - convert message header address format to Ethernet format + */ + +static int eth_msg2addr(struct tipc_media_addr *a, char *msg_area) { - unchar *addr = (unchar *)&a->dev_addr; + if (msg_area[TIPC_MEDIA_TYPE_OFFSET] != TIPC_MEDIA_TYPE_ETH) + return 1; - if (str_size < 18) - *str_buf = '\0'; - else - sprintf(str_buf, "%pM", addr); - return str_buf; + eth_media_addr_set(a, msg_area + ETH_ADDR_OFFSET); + return 0; } +/* + * Ethernet media registration info + */ + +static struct tipc_media eth_media_info = { + .send_msg = send_msg, + .enable_bearer = enable_bearer, + .disable_bearer = disable_bearer, + .addr2str = eth_addr2str, + .str2addr = eth_str2addr, + .addr2msg = eth_addr2msg, + .msg2addr = eth_msg2addr, + .bcast_addr = { { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, + TIPC_MEDIA_TYPE_ETH, 1 }, + .priority = TIPC_DEF_LINK_PRI, + .tolerance = TIPC_DEF_LINK_TOL, + .window = TIPC_DEF_LINK_WIN, + .type_id = TIPC_MEDIA_TYPE_ETH, + .name = "eth" +}; + /** * tipc_eth_media_start - activate Ethernet bearer support * @@ -270,21 +366,12 @@ static char *eth_addr2str(struct tipc_media_addr *a, char *str_buf, int str_size int tipc_eth_media_start(void) { - struct tipc_media_addr bcast_addr; int res; if (eth_started) return -EINVAL; - bcast_addr.type = htonl(TIPC_MEDIA_TYPE_ETH); - memset(&bcast_addr.dev_addr, 0xff, ETH_ALEN); - - memset(eth_bearers, 0, sizeof(eth_bearers)); - - res = tipc_register_media(TIPC_MEDIA_TYPE_ETH, "eth", - enable_bearer, disable_bearer, send_msg, - eth_addr2str, &bcast_addr, ETH_LINK_PRIORITY, - ETH_LINK_TOLERANCE, ETH_LINK_WINDOW); + res = tipc_register_media(ð_media_info); if (res) return res; @@ -302,22 +389,10 @@ int tipc_eth_media_start(void) void tipc_eth_media_stop(void) { - int i; - if (!eth_started) return; + flush_scheduled_work(); unregister_netdevice_notifier(¬ifier); - for (i = 0; i < MAX_ETH_BEARERS ; i++) { - if (eth_bearers[i].bearer) { - eth_bearers[i].bearer->blocked = 1; - eth_bearers[i].bearer = NULL; - } - if (eth_bearers[i].dev) { - dev_remove_pack(ð_bearers[i].tipc_packet_type); - dev_put(eth_bearers[i].dev); - } - } - memset(ð_bearers, 0, sizeof(eth_bearers)); eth_started = 0; } diff --git a/net/tipc/link.c b/net/tipc/link.c index f89570c54f54..ac1832a66f8a 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -71,35 +71,36 @@ #define START_CHANGEOVER 100000u /** - * struct link_name - deconstructed link name + * struct tipc_link_name - deconstructed link name * @addr_local: network address of node at this end * @if_local: name of interface at this end * @addr_peer: network address of node at far end * @if_peer: name of interface at far end */ -struct link_name { +struct tipc_link_name { u32 addr_local; char if_local[TIPC_MAX_IF_NAME]; u32 addr_peer; char if_peer[TIPC_MAX_IF_NAME]; }; -static void link_handle_out_of_seq_msg(struct link *l_ptr, +static void link_handle_out_of_seq_msg(struct tipc_link *l_ptr, struct sk_buff *buf); -static void link_recv_proto_msg(struct link *l_ptr, struct sk_buff *buf); -static int link_recv_changeover_msg(struct link **l_ptr, struct sk_buff **buf); -static void link_set_supervision_props(struct link *l_ptr, u32 tolerance); +static void link_recv_proto_msg(struct tipc_link *l_ptr, struct sk_buff *buf); +static int link_recv_changeover_msg(struct tipc_link **l_ptr, + struct sk_buff **buf); +static void link_set_supervision_props(struct tipc_link *l_ptr, u32 tolerance); static int link_send_sections_long(struct tipc_port *sender, struct iovec const *msg_sect, u32 num_sect, unsigned int total_len, u32 destnode); -static void link_check_defragm_bufs(struct link *l_ptr); -static void link_state_event(struct link *l_ptr, u32 event); -static void link_reset_statistics(struct link *l_ptr); -static void link_print(struct link *l_ptr, const char *str); -static void link_start(struct link *l_ptr); -static int link_send_long_buf(struct link *l_ptr, struct sk_buff *buf); +static void link_check_defragm_bufs(struct tipc_link *l_ptr); +static void link_state_event(struct tipc_link *l_ptr, u32 event); +static void link_reset_statistics(struct tipc_link *l_ptr); +static void link_print(struct tipc_link *l_ptr, const char *str); +static void link_start(struct tipc_link *l_ptr); +static int link_send_long_buf(struct tipc_link *l_ptr, struct sk_buff *buf); /* * Simple link routines @@ -110,7 +111,7 @@ static unsigned int align(unsigned int i) return (i + 3) & ~3u; } -static void link_init_max_pkt(struct link *l_ptr) +static void link_init_max_pkt(struct tipc_link *l_ptr) { u32 max_pkt; @@ -127,14 +128,14 @@ static void link_init_max_pkt(struct link *l_ptr) l_ptr->max_pkt_probes = 0; } -static u32 link_next_sent(struct link *l_ptr) +static u32 link_next_sent(struct tipc_link *l_ptr) { if (l_ptr->next_out) - return msg_seqno(buf_msg(l_ptr->next_out)); + return buf_seqno(l_ptr->next_out); return mod(l_ptr->next_out_no); } -static u32 link_last_sent(struct link *l_ptr) +static u32 link_last_sent(struct tipc_link *l_ptr) { return mod(link_next_sent(l_ptr) - 1); } @@ -143,28 +144,29 @@ static u32 link_last_sent(struct link *l_ptr) * Simple non-static link routines (i.e. referenced outside this file) */ -int tipc_link_is_up(struct link *l_ptr) +int tipc_link_is_up(struct tipc_link *l_ptr) { if (!l_ptr) return 0; return link_working_working(l_ptr) || link_working_unknown(l_ptr); } -int tipc_link_is_active(struct link *l_ptr) +int tipc_link_is_active(struct tipc_link *l_ptr) { return (l_ptr->owner->active_links[0] == l_ptr) || (l_ptr->owner->active_links[1] == l_ptr); } /** - * link_name_validate - validate & (optionally) deconstruct link name + * link_name_validate - validate & (optionally) deconstruct tipc_link name * @name - ptr to link name string * @name_parts - ptr to area for link name components (or NULL if not needed) * * Returns 1 if link name is valid, otherwise 0. */ -static int link_name_validate(const char *name, struct link_name *name_parts) +static int link_name_validate(const char *name, + struct tipc_link_name *name_parts) { char name_copy[TIPC_MAX_LINK_NAME]; char *addr_local; @@ -238,7 +240,7 @@ static int link_name_validate(const char *name, struct link_name *name_parts) * tipc_node_delete() is called.) */ -static void link_timeout(struct link *l_ptr) +static void link_timeout(struct tipc_link *l_ptr) { tipc_node_lock(l_ptr->owner); @@ -287,7 +289,7 @@ static void link_timeout(struct link *l_ptr) tipc_node_unlock(l_ptr->owner); } -static void link_set_timer(struct link *l_ptr, u32 time) +static void link_set_timer(struct tipc_link *l_ptr, u32 time) { k_start_timer(&l_ptr->timer, time); } @@ -301,11 +303,11 @@ static void link_set_timer(struct link *l_ptr, u32 time) * Returns pointer to link. */ -struct link *tipc_link_create(struct tipc_node *n_ptr, +struct tipc_link *tipc_link_create(struct tipc_node *n_ptr, struct tipc_bearer *b_ptr, const struct tipc_media_addr *media_addr) { - struct link *l_ptr; + struct tipc_link *l_ptr; struct tipc_msg *msg; char *if_name; char addr_string[16]; @@ -332,17 +334,18 @@ struct link *tipc_link_create(struct tipc_node *n_ptr, l_ptr->addr = peer; if_name = strchr(b_ptr->name, ':') + 1; - sprintf(l_ptr->name, "%u.%u.%u:%s-%u.%u.%u:", + sprintf(l_ptr->name, "%u.%u.%u:%s-%u.%u.%u:unknown", tipc_zone(tipc_own_addr), tipc_cluster(tipc_own_addr), tipc_node(tipc_own_addr), if_name, tipc_zone(peer), tipc_cluster(peer), tipc_node(peer)); - /* note: peer i/f is appended to link name by reset/activate */ + /* note: peer i/f name is updated by reset/activate message */ memcpy(&l_ptr->media_addr, media_addr, sizeof(*media_addr)); l_ptr->owner = n_ptr; l_ptr->checkpoint = 1; + l_ptr->peer_session = INVALID_SESSION; l_ptr->b_ptr = b_ptr; - link_set_supervision_props(l_ptr, b_ptr->media->tolerance); + link_set_supervision_props(l_ptr, b_ptr->tolerance); l_ptr->state = RESET_UNKNOWN; l_ptr->pmsg = (struct tipc_msg *)&l_ptr->proto_msg; @@ -354,7 +357,7 @@ struct link *tipc_link_create(struct tipc_node *n_ptr, strcpy((char *)msg_data(msg), if_name); l_ptr->priority = b_ptr->priority; - tipc_link_set_queue_limits(l_ptr, b_ptr->media->window); + tipc_link_set_queue_limits(l_ptr, b_ptr->window); link_init_max_pkt(l_ptr); @@ -381,7 +384,7 @@ struct link *tipc_link_create(struct tipc_node *n_ptr, * to avoid a potential deadlock situation. */ -void tipc_link_delete(struct link *l_ptr) +void tipc_link_delete(struct tipc_link *l_ptr) { if (!l_ptr) { err("Attempt to delete non-existent link\n"); @@ -400,7 +403,7 @@ void tipc_link_delete(struct link *l_ptr) kfree(l_ptr); } -static void link_start(struct link *l_ptr) +static void link_start(struct tipc_link *l_ptr) { tipc_node_lock(l_ptr->owner); link_state_event(l_ptr, STARTING_EVT); @@ -417,7 +420,7 @@ static void link_start(struct link *l_ptr) * has abated. */ -static int link_schedule_port(struct link *l_ptr, u32 origport, u32 sz) +static int link_schedule_port(struct tipc_link *l_ptr, u32 origport, u32 sz) { struct tipc_port *p_ptr; @@ -439,7 +442,7 @@ exit: return -ELINKCONG; } -void tipc_link_wakeup_ports(struct link *l_ptr, int all) +void tipc_link_wakeup_ports(struct tipc_link *l_ptr, int all) { struct tipc_port *p_ptr; struct tipc_port *temp_p_ptr; @@ -474,7 +477,7 @@ exit: * @l_ptr: pointer to link */ -static void link_release_outqueue(struct link *l_ptr) +static void link_release_outqueue(struct tipc_link *l_ptr) { struct sk_buff *buf = l_ptr->first_out; struct sk_buff *next; @@ -493,7 +496,7 @@ static void link_release_outqueue(struct link *l_ptr) * @l_ptr: pointer to link */ -void tipc_link_reset_fragments(struct link *l_ptr) +void tipc_link_reset_fragments(struct tipc_link *l_ptr) { struct sk_buff *buf = l_ptr->defragm_buf; struct sk_buff *next; @@ -511,7 +514,7 @@ void tipc_link_reset_fragments(struct link *l_ptr) * @l_ptr: pointer to link */ -void tipc_link_stop(struct link *l_ptr) +void tipc_link_stop(struct tipc_link *l_ptr) { struct sk_buff *buf; struct sk_buff *next; @@ -536,10 +539,7 @@ void tipc_link_stop(struct link *l_ptr) l_ptr->proto_msg_queue = NULL; } -/* LINK EVENT CODE IS NOT SUPPORTED AT PRESENT */ -#define link_send_event(fcn, l_ptr, up) do { } while (0) - -void tipc_link_reset(struct link *l_ptr) +void tipc_link_reset(struct tipc_link *l_ptr) { struct sk_buff *buf; u32 prev_state = l_ptr->state; @@ -596,21 +596,14 @@ void tipc_link_reset(struct link *l_ptr) l_ptr->fsm_msg_cnt = 0; l_ptr->stale_count = 0; link_reset_statistics(l_ptr); - - link_send_event(tipc_cfg_link_event, l_ptr, 0); - if (!in_own_cluster(l_ptr->addr)) - link_send_event(tipc_disc_link_event, l_ptr, 0); } -static void link_activate(struct link *l_ptr) +static void link_activate(struct tipc_link *l_ptr) { l_ptr->next_in_no = l_ptr->stats.recv_info = 1; tipc_node_link_up(l_ptr->owner, l_ptr); tipc_bearer_add_dest(l_ptr->b_ptr, l_ptr->addr); - link_send_event(tipc_cfg_link_event, l_ptr, 1); - if (!in_own_cluster(l_ptr->addr)) - link_send_event(tipc_disc_link_event, l_ptr, 1); } /** @@ -619,9 +612,9 @@ static void link_activate(struct link *l_ptr) * @event: state machine event to process */ -static void link_state_event(struct link *l_ptr, unsigned event) +static void link_state_event(struct tipc_link *l_ptr, unsigned event) { - struct link *other; + struct tipc_link *other; u32 cont_intv = l_ptr->continuity_interval; if (!l_ptr->started && (event != STARTING_EVT)) @@ -793,7 +786,7 @@ static void link_state_event(struct link *l_ptr, unsigned event) * the tail of an existing one. */ -static int link_bundle_buf(struct link *l_ptr, +static int link_bundle_buf(struct tipc_link *l_ptr, struct sk_buff *bundler, struct sk_buff *buf) { @@ -822,7 +815,7 @@ static int link_bundle_buf(struct link *l_ptr, return 1; } -static void link_add_to_outqueue(struct link *l_ptr, +static void link_add_to_outqueue(struct tipc_link *l_ptr, struct sk_buff *buf, struct tipc_msg *msg) { @@ -843,7 +836,7 @@ static void link_add_to_outqueue(struct link *l_ptr, l_ptr->stats.max_queue_sz = l_ptr->out_queue_size; } -static void link_add_chain_to_outqueue(struct link *l_ptr, +static void link_add_chain_to_outqueue(struct tipc_link *l_ptr, struct sk_buff *buf_chain, u32 long_msgno) { @@ -868,7 +861,7 @@ static void link_add_chain_to_outqueue(struct link *l_ptr, * has failed, and from link_send() */ -int tipc_link_send_buf(struct link *l_ptr, struct sk_buff *buf) +int tipc_link_send_buf(struct tipc_link *l_ptr, struct sk_buff *buf) { struct tipc_msg *msg = buf_msg(buf); u32 size = msg_size(msg); @@ -963,7 +956,7 @@ int tipc_link_send_buf(struct link *l_ptr, struct sk_buff *buf) int tipc_link_send(struct sk_buff *buf, u32 dest, u32 selector) { - struct link *l_ptr; + struct tipc_link *l_ptr; struct tipc_node *n_ptr; int res = -ELINKCONG; @@ -985,13 +978,58 @@ int tipc_link_send(struct sk_buff *buf, u32 dest, u32 selector) } /* + * tipc_link_send_names - send name table entries to new neighbor + * + * Send routine for bulk delivery of name table messages when contact + * with a new neighbor occurs. No link congestion checking is performed + * because name table messages *must* be delivered. The messages must be + * small enough not to require fragmentation. + * Called without any locks held. + */ + +void tipc_link_send_names(struct list_head *message_list, u32 dest) +{ + struct tipc_node *n_ptr; + struct tipc_link *l_ptr; + struct sk_buff *buf; + struct sk_buff *temp_buf; + + if (list_empty(message_list)) + return; + + read_lock_bh(&tipc_net_lock); + n_ptr = tipc_node_find(dest); + if (n_ptr) { + tipc_node_lock(n_ptr); + l_ptr = n_ptr->active_links[0]; + if (l_ptr) { + /* convert circular list to linear list */ + ((struct sk_buff *)message_list->prev)->next = NULL; + link_add_chain_to_outqueue(l_ptr, + (struct sk_buff *)message_list->next, 0); + tipc_link_push_queue(l_ptr); + INIT_LIST_HEAD(message_list); + } + tipc_node_unlock(n_ptr); + } + read_unlock_bh(&tipc_net_lock); + + /* discard the messages if they couldn't be sent */ + + list_for_each_safe(buf, temp_buf, ((struct sk_buff *)message_list)) { + list_del((struct list_head *)buf); + buf_discard(buf); + } +} + +/* * link_send_buf_fast: Entry for data messages where the * destination link is known and the header is complete, * inclusive total message length. Very time critical. * Link is locked. Returns user data length. */ -static int link_send_buf_fast(struct link *l_ptr, struct sk_buff *buf, +static int link_send_buf_fast(struct tipc_link *l_ptr, struct sk_buff *buf, u32 *used_max_pkt) { struct tipc_msg *msg = buf_msg(buf); @@ -1025,15 +1063,12 @@ static int link_send_buf_fast(struct link *l_ptr, struct sk_buff *buf, */ int tipc_send_buf_fast(struct sk_buff *buf, u32 destnode) { - struct link *l_ptr; + struct tipc_link *l_ptr; struct tipc_node *n_ptr; int res; u32 selector = msg_origport(buf_msg(buf)) & 1; u32 dummy; - if (destnode == tipc_own_addr) - return tipc_port_recv_msg(buf); - read_lock_bh(&tipc_net_lock); n_ptr = tipc_node_find(destnode); if (likely(n_ptr)) { @@ -1067,7 +1102,7 @@ int tipc_link_send_sections_fast(struct tipc_port *sender, u32 destaddr) { struct tipc_msg *hdr = &sender->phdr; - struct link *l_ptr; + struct tipc_link *l_ptr; struct sk_buff *buf; struct tipc_node *node; int res; @@ -1162,7 +1197,7 @@ static int link_send_sections_long(struct tipc_port *sender, unsigned int total_len, u32 destaddr) { - struct link *l_ptr; + struct tipc_link *l_ptr; struct tipc_node *node; struct tipc_msg *hdr = &sender->phdr; u32 dsz = total_len; @@ -1309,7 +1344,7 @@ reject: /* * tipc_link_push_packet: Push one unsent packet to the media */ -u32 tipc_link_push_packet(struct link *l_ptr) +u32 tipc_link_push_packet(struct tipc_link *l_ptr) { struct sk_buff *buf = l_ptr->first_out; u32 r_q_size = l_ptr->retransm_queue_size; @@ -1321,7 +1356,7 @@ u32 tipc_link_push_packet(struct link *l_ptr) if (r_q_size && buf) { u32 last = lesser(mod(r_q_head + r_q_size), link_last_sent(l_ptr)); - u32 first = msg_seqno(buf_msg(buf)); + u32 first = buf_seqno(buf); while (buf && less(first, r_q_head)) { first = mod(first + 1); @@ -1370,7 +1405,7 @@ u32 tipc_link_push_packet(struct link *l_ptr) if (buf) { struct tipc_msg *msg = buf_msg(buf); u32 next = msg_seqno(msg); - u32 first = msg_seqno(buf_msg(l_ptr->first_out)); + u32 first = buf_seqno(l_ptr->first_out); if (mod(next - first) < l_ptr->queue_limit[0]) { msg_set_ack(msg, mod(l_ptr->next_in_no - 1)); @@ -1393,7 +1428,7 @@ u32 tipc_link_push_packet(struct link *l_ptr) * push_queue(): push out the unsent messages of a link where * congestion has abated. Node is locked */ -void tipc_link_push_queue(struct link *l_ptr) +void tipc_link_push_queue(struct tipc_link *l_ptr) { u32 res; @@ -1437,7 +1472,8 @@ static void link_reset_all(unsigned long addr) read_unlock_bh(&tipc_net_lock); } -static void link_retransmit_failure(struct link *l_ptr, struct sk_buff *buf) +static void link_retransmit_failure(struct tipc_link *l_ptr, + struct sk_buff *buf) { struct tipc_msg *msg = buf_msg(buf); @@ -1481,7 +1517,7 @@ static void link_retransmit_failure(struct link *l_ptr, struct sk_buff *buf) } } -void tipc_link_retransmit(struct link *l_ptr, struct sk_buff *buf, +void tipc_link_retransmit(struct tipc_link *l_ptr, struct sk_buff *buf, u32 retransmits) { struct tipc_msg *msg; @@ -1525,7 +1561,7 @@ void tipc_link_retransmit(struct link *l_ptr, struct sk_buff *buf, } else { tipc_bearer_schedule(l_ptr->b_ptr, l_ptr); l_ptr->stats.bearer_congs++; - l_ptr->retransm_queue_head = msg_seqno(buf_msg(buf)); + l_ptr->retransm_queue_head = buf_seqno(buf); l_ptr->retransm_queue_size = retransmits; return; } @@ -1538,7 +1574,7 @@ void tipc_link_retransmit(struct link *l_ptr, struct sk_buff *buf, * link_insert_deferred_queue - insert deferred messages back into receive chain */ -static struct sk_buff *link_insert_deferred_queue(struct link *l_ptr, +static struct sk_buff *link_insert_deferred_queue(struct tipc_link *l_ptr, struct sk_buff *buf) { u32 seq_no; @@ -1546,7 +1582,7 @@ static struct sk_buff *link_insert_deferred_queue(struct link *l_ptr, if (l_ptr->oldest_deferred_in == NULL) return buf; - seq_no = msg_seqno(buf_msg(l_ptr->oldest_deferred_in)); + seq_no = buf_seqno(l_ptr->oldest_deferred_in); if (seq_no == mod(l_ptr->next_in_no)) { l_ptr->newest_deferred_in->next = buf; buf = l_ptr->oldest_deferred_in; @@ -1620,7 +1656,7 @@ void tipc_recv_msg(struct sk_buff *head, struct tipc_bearer *b_ptr) read_lock_bh(&tipc_net_lock); while (head) { struct tipc_node *n_ptr; - struct link *l_ptr; + struct tipc_link *l_ptr; struct sk_buff *crs; struct sk_buff *buf = head; struct tipc_msg *msg; @@ -1658,19 +1694,12 @@ void tipc_recv_msg(struct sk_buff *head, struct tipc_bearer *b_ptr) continue; } + /* Discard unicast link messages destined for another node */ + if (unlikely(!msg_short(msg) && (msg_destnode(msg) != tipc_own_addr))) goto cont; - /* Discard non-routeable messages destined for another node */ - - if (unlikely(!msg_isdata(msg) && - (msg_destnode(msg) != tipc_own_addr))) { - if ((msg_user(msg) != CONN_MANAGER) && - (msg_user(msg) != MSG_FRAGMENTER)) - goto cont; - } - /* Locate neighboring node that sent message */ n_ptr = tipc_node_find(msg_prevnode(msg)); @@ -1678,17 +1707,24 @@ void tipc_recv_msg(struct sk_buff *head, struct tipc_bearer *b_ptr) goto cont; tipc_node_lock(n_ptr); - /* Don't talk to neighbor during cleanup after last session */ + /* Locate unicast link endpoint that should handle message */ - if (n_ptr->cleanup_required) { + l_ptr = n_ptr->links[b_ptr->identity]; + if (unlikely(!l_ptr)) { tipc_node_unlock(n_ptr); goto cont; } - /* Locate unicast link endpoint that should handle message */ + /* Verify that communication with node is currently allowed */ - l_ptr = n_ptr->links[b_ptr->identity]; - if (unlikely(!l_ptr)) { + if ((n_ptr->block_setup & WAIT_PEER_DOWN) && + msg_user(msg) == LINK_PROTOCOL && + (msg_type(msg) == RESET_MSG || + msg_type(msg) == ACTIVATE_MSG) && + !msg_redundant_link(msg)) + n_ptr->block_setup &= ~WAIT_PEER_DOWN; + + if (n_ptr->block_setup) { tipc_node_unlock(n_ptr); goto cont; } @@ -1700,14 +1736,12 @@ void tipc_recv_msg(struct sk_buff *head, struct tipc_bearer *b_ptr) /* Release acked messages */ - if (less(n_ptr->bclink.acked, msg_bcast_ack(msg))) { - if (tipc_node_is_up(n_ptr) && n_ptr->bclink.supported) - tipc_bclink_acknowledge(n_ptr, msg_bcast_ack(msg)); - } + if (tipc_node_is_up(n_ptr) && n_ptr->bclink.supported) + tipc_bclink_acknowledge(n_ptr, msg_bcast_ack(msg)); crs = l_ptr->first_out; while ((crs != l_ptr->next_out) && - less_eq(msg_seqno(buf_msg(crs)), ackd)) { + less_eq(buf_seqno(crs), ackd)) { struct sk_buff *next = crs->next; buf_discard(crs); @@ -1830,7 +1864,7 @@ u32 tipc_link_defer_pkt(struct sk_buff **head, { struct sk_buff *prev = NULL; struct sk_buff *crs = *head; - u32 seq_no = msg_seqno(buf_msg(buf)); + u32 seq_no = buf_seqno(buf); buf->next = NULL; @@ -1841,7 +1875,7 @@ u32 tipc_link_defer_pkt(struct sk_buff **head, } /* Last ? */ - if (less(msg_seqno(buf_msg(*tail)), seq_no)) { + if (less(buf_seqno(*tail), seq_no)) { (*tail)->next = buf; *tail = buf; return 1; @@ -1875,10 +1909,10 @@ u32 tipc_link_defer_pkt(struct sk_buff **head, * link_handle_out_of_seq_msg - handle arrival of out-of-sequence packet */ -static void link_handle_out_of_seq_msg(struct link *l_ptr, +static void link_handle_out_of_seq_msg(struct tipc_link *l_ptr, struct sk_buff *buf) { - u32 seq_no = msg_seqno(buf_msg(buf)); + u32 seq_no = buf_seqno(buf); if (likely(msg_user(buf_msg(buf)) == LINK_PROTOCOL)) { link_recv_proto_msg(l_ptr, buf); @@ -1913,8 +1947,9 @@ static void link_handle_out_of_seq_msg(struct link *l_ptr, /* * Send protocol message to the other endpoint. */ -void tipc_link_send_proto_msg(struct link *l_ptr, u32 msg_typ, int probe_msg, - u32 gap, u32 tolerance, u32 priority, u32 ack_mtu) +void tipc_link_send_proto_msg(struct tipc_link *l_ptr, u32 msg_typ, + int probe_msg, u32 gap, u32 tolerance, + u32 priority, u32 ack_mtu) { struct sk_buff *buf = NULL; struct tipc_msg *msg = l_ptr->pmsg; @@ -1923,6 +1958,12 @@ void tipc_link_send_proto_msg(struct link *l_ptr, u32 msg_typ, int probe_msg, if (link_blocked(l_ptr)) return; + + /* Abort non-RESET send if communication with node is prohibited */ + + if ((l_ptr->owner->block_setup) && (msg_typ != RESET_MSG)) + return; + msg_set_type(msg, msg_typ); msg_set_net_plane(msg, l_ptr->b_ptr->net_plane); msg_set_bcast_ack(msg, mod(l_ptr->owner->bclink.last_in)); @@ -1934,10 +1975,10 @@ void tipc_link_send_proto_msg(struct link *l_ptr, u32 msg_typ, int probe_msg, if (!tipc_link_is_up(l_ptr)) return; if (l_ptr->next_out) - next_sent = msg_seqno(buf_msg(l_ptr->next_out)); + next_sent = buf_seqno(l_ptr->next_out); msg_set_next_sent(msg, next_sent); if (l_ptr->oldest_deferred_in) { - u32 rec = msg_seqno(buf_msg(l_ptr->oldest_deferred_in)); + u32 rec = buf_seqno(l_ptr->oldest_deferred_in); gap = mod(rec - mod(l_ptr->next_in_no)); } msg_set_seq_gap(msg, gap); @@ -2025,7 +2066,7 @@ void tipc_link_send_proto_msg(struct link *l_ptr, u32 msg_typ, int probe_msg, * change at any time. The node with lowest address rules */ -static void link_recv_proto_msg(struct link *l_ptr, struct sk_buff *buf) +static void link_recv_proto_msg(struct tipc_link *l_ptr, struct sk_buff *buf) { u32 rec_gap = 0; u32 max_pkt_info; @@ -2051,9 +2092,19 @@ static void link_recv_proto_msg(struct link *l_ptr, struct sk_buff *buf) case RESET_MSG: if (!link_working_unknown(l_ptr) && (l_ptr->peer_session != INVALID_SESSION)) { - if (msg_session(msg) == l_ptr->peer_session) - break; /* duplicate: ignore */ + if (less_eq(msg_session(msg), l_ptr->peer_session)) + break; /* duplicate or old reset: ignore */ + } + + if (!msg_redundant_link(msg) && (link_working_working(l_ptr) || + link_working_unknown(l_ptr))) { + /* + * peer has lost contact -- don't allow peer's links + * to reactivate before we recognize loss & clean up + */ + l_ptr->owner->block_setup = WAIT_NODE_DOWN; } + /* fall thru' */ case ACTIVATE_MSG: /* Update link settings according other endpoint's values */ @@ -2148,12 +2199,12 @@ exit: * tipc_link_tunnel(): Send one message via a link belonging to * another bearer. Owner node is locked. */ -static void tipc_link_tunnel(struct link *l_ptr, +static void tipc_link_tunnel(struct tipc_link *l_ptr, struct tipc_msg *tunnel_hdr, struct tipc_msg *msg, u32 selector) { - struct link *tunnel; + struct tipc_link *tunnel; struct sk_buff *buf; u32 length = msg_size(msg); @@ -2182,11 +2233,11 @@ static void tipc_link_tunnel(struct link *l_ptr, * Owner node is locked. */ -void tipc_link_changeover(struct link *l_ptr) +void tipc_link_changeover(struct tipc_link *l_ptr) { u32 msgcount = l_ptr->out_queue_size; struct sk_buff *crs = l_ptr->first_out; - struct link *tunnel = l_ptr->owner->active_links[0]; + struct tipc_link *tunnel = l_ptr->owner->active_links[0]; struct tipc_msg tunnel_hdr; int split_bundles; @@ -2245,7 +2296,7 @@ void tipc_link_changeover(struct link *l_ptr) } } -void tipc_link_send_duplicate(struct link *l_ptr, struct link *tunnel) +void tipc_link_send_duplicate(struct tipc_link *l_ptr, struct tipc_link *tunnel) { struct sk_buff *iter; struct tipc_msg tunnel_hdr; @@ -2309,11 +2360,11 @@ static struct sk_buff *buf_extract(struct sk_buff *skb, u32 from_pos) * via other link. Node is locked. Return extracted buffer. */ -static int link_recv_changeover_msg(struct link **l_ptr, +static int link_recv_changeover_msg(struct tipc_link **l_ptr, struct sk_buff **buf) { struct sk_buff *tunnel_buf = *buf; - struct link *dest_link; + struct tipc_link *dest_link; struct tipc_msg *msg; struct tipc_msg *tunnel_msg = buf_msg(tunnel_buf); u32 msg_typ = msg_type(tunnel_msg); @@ -2413,7 +2464,7 @@ void tipc_link_recv_bundle(struct sk_buff *buf) * The buffer is complete, inclusive total message length. * Returns user data length. */ -static int link_send_long_buf(struct link *l_ptr, struct sk_buff *buf) +static int link_send_long_buf(struct tipc_link *l_ptr, struct sk_buff *buf) { struct sk_buff *buf_chain = NULL; struct sk_buff *buf_chain_tail = (struct sk_buff *)&buf_chain; @@ -2542,7 +2593,7 @@ int tipc_link_recv_fragment(struct sk_buff **pending, struct sk_buff **fb, /* Is there an incomplete message waiting for this fragment? */ - while (pbuf && ((msg_seqno(buf_msg(pbuf)) != long_msg_seq_no) || + while (pbuf && ((buf_seqno(pbuf) != long_msg_seq_no) || (msg_orignode(fragm) != msg_orignode(buf_msg(pbuf))))) { prev = pbuf; pbuf = pbuf->next; @@ -2609,7 +2660,7 @@ int tipc_link_recv_fragment(struct sk_buff **pending, struct sk_buff **fb, * @l_ptr: pointer to link */ -static void link_check_defragm_bufs(struct link *l_ptr) +static void link_check_defragm_bufs(struct tipc_link *l_ptr) { struct sk_buff *prev = NULL; struct sk_buff *next = NULL; @@ -2639,7 +2690,7 @@ static void link_check_defragm_bufs(struct link *l_ptr) -static void link_set_supervision_props(struct link *l_ptr, u32 tolerance) +static void link_set_supervision_props(struct tipc_link *l_ptr, u32 tolerance) { if ((tolerance < TIPC_MIN_LINK_TOL) || (tolerance > TIPC_MAX_LINK_TOL)) return; @@ -2651,7 +2702,7 @@ static void link_set_supervision_props(struct link *l_ptr, u32 tolerance) } -void tipc_link_set_queue_limits(struct link *l_ptr, u32 window) +void tipc_link_set_queue_limits(struct tipc_link *l_ptr, u32 window) { /* Data messages from this node, inclusive FIRST_FRAGM */ l_ptr->queue_limit[TIPC_LOW_IMPORTANCE] = window; @@ -2681,11 +2732,12 @@ void tipc_link_set_queue_limits(struct link *l_ptr, u32 window) * Returns pointer to link (or 0 if invalid link name). */ -static struct link *link_find_link(const char *name, struct tipc_node **node) +static struct tipc_link *link_find_link(const char *name, + struct tipc_node **node) { - struct link_name link_name_parts; + struct tipc_link_name link_name_parts; struct tipc_bearer *b_ptr; - struct link *l_ptr; + struct tipc_link *l_ptr; if (!link_name_validate(name, &link_name_parts)) return NULL; @@ -2705,13 +2757,113 @@ static struct link *link_find_link(const char *name, struct tipc_node **node) return l_ptr; } +/** + * link_value_is_valid -- validate proposed link tolerance/priority/window + * + * @cmd - value type (TIPC_CMD_SET_LINK_*) + * @new_value - the new value + * + * Returns 1 if value is within range, 0 if not. + */ + +static int link_value_is_valid(u16 cmd, u32 new_value) +{ + switch (cmd) { + case TIPC_CMD_SET_LINK_TOL: + return (new_value >= TIPC_MIN_LINK_TOL) && + (new_value <= TIPC_MAX_LINK_TOL); + case TIPC_CMD_SET_LINK_PRI: + return (new_value <= TIPC_MAX_LINK_PRI); + case TIPC_CMD_SET_LINK_WINDOW: + return (new_value >= TIPC_MIN_LINK_WIN) && + (new_value <= TIPC_MAX_LINK_WIN); + } + return 0; +} + + +/** + * link_cmd_set_value - change priority/tolerance/window for link/bearer/media + * @name - ptr to link, bearer, or media name + * @new_value - new value of link, bearer, or media setting + * @cmd - which link, bearer, or media attribute to set (TIPC_CMD_SET_LINK_*) + * + * Caller must hold 'tipc_net_lock' to ensure link/bearer/media is not deleted. + * + * Returns 0 if value updated and negative value on error. + */ + +static int link_cmd_set_value(const char *name, u32 new_value, u16 cmd) +{ + struct tipc_node *node; + struct tipc_link *l_ptr; + struct tipc_bearer *b_ptr; + struct tipc_media *m_ptr; + + l_ptr = link_find_link(name, &node); + if (l_ptr) { + /* + * acquire node lock for tipc_link_send_proto_msg(). + * see "TIPC locking policy" in net.c. + */ + tipc_node_lock(node); + switch (cmd) { + case TIPC_CMD_SET_LINK_TOL: + link_set_supervision_props(l_ptr, new_value); + tipc_link_send_proto_msg(l_ptr, + STATE_MSG, 0, 0, new_value, 0, 0); + break; + case TIPC_CMD_SET_LINK_PRI: + l_ptr->priority = new_value; + tipc_link_send_proto_msg(l_ptr, + STATE_MSG, 0, 0, 0, new_value, 0); + break; + case TIPC_CMD_SET_LINK_WINDOW: + tipc_link_set_queue_limits(l_ptr, new_value); + break; + } + tipc_node_unlock(node); + return 0; + } + + b_ptr = tipc_bearer_find(name); + if (b_ptr) { + switch (cmd) { + case TIPC_CMD_SET_LINK_TOL: + b_ptr->tolerance = new_value; + return 0; + case TIPC_CMD_SET_LINK_PRI: + b_ptr->priority = new_value; + return 0; + case TIPC_CMD_SET_LINK_WINDOW: + b_ptr->window = new_value; + return 0; + } + return -EINVAL; + } + + m_ptr = tipc_media_find(name); + if (!m_ptr) + return -ENODEV; + switch (cmd) { + case TIPC_CMD_SET_LINK_TOL: + m_ptr->tolerance = new_value; + return 0; + case TIPC_CMD_SET_LINK_PRI: + m_ptr->priority = new_value; + return 0; + case TIPC_CMD_SET_LINK_WINDOW: + m_ptr->window = new_value; + return 0; + } + return -EINVAL; +} + struct sk_buff *tipc_link_cmd_config(const void *req_tlv_area, int req_tlv_space, u16 cmd) { struct tipc_link_config *args; u32 new_value; - struct link *l_ptr; - struct tipc_node *node; int res; if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_LINK_CONFIG)) @@ -2720,6 +2872,10 @@ struct sk_buff *tipc_link_cmd_config(const void *req_tlv_area, int req_tlv_space args = (struct tipc_link_config *)TLV_DATA(req_tlv_area); new_value = ntohl(args->value); + if (!link_value_is_valid(cmd, new_value)) + return tipc_cfg_reply_error_string( + "cannot change, value invalid"); + if (!strcmp(args->name, tipc_bclink_name)) { if ((cmd == TIPC_CMD_SET_LINK_WINDOW) && (tipc_bclink_set_queue_limits(new_value) == 0)) @@ -2729,43 +2885,7 @@ struct sk_buff *tipc_link_cmd_config(const void *req_tlv_area, int req_tlv_space } read_lock_bh(&tipc_net_lock); - l_ptr = link_find_link(args->name, &node); - if (!l_ptr) { - read_unlock_bh(&tipc_net_lock); - return tipc_cfg_reply_error_string("link not found"); - } - - tipc_node_lock(node); - res = -EINVAL; - switch (cmd) { - case TIPC_CMD_SET_LINK_TOL: - if ((new_value >= TIPC_MIN_LINK_TOL) && - (new_value <= TIPC_MAX_LINK_TOL)) { - link_set_supervision_props(l_ptr, new_value); - tipc_link_send_proto_msg(l_ptr, STATE_MSG, - 0, 0, new_value, 0, 0); - res = 0; - } - break; - case TIPC_CMD_SET_LINK_PRI: - if ((new_value >= TIPC_MIN_LINK_PRI) && - (new_value <= TIPC_MAX_LINK_PRI)) { - l_ptr->priority = new_value; - tipc_link_send_proto_msg(l_ptr, STATE_MSG, - 0, 0, 0, new_value, 0); - res = 0; - } - break; - case TIPC_CMD_SET_LINK_WINDOW: - if ((new_value >= TIPC_MIN_LINK_WIN) && - (new_value <= TIPC_MAX_LINK_WIN)) { - tipc_link_set_queue_limits(l_ptr, new_value); - res = 0; - } - break; - } - tipc_node_unlock(node); - + res = link_cmd_set_value(args->name, new_value, cmd); read_unlock_bh(&tipc_net_lock); if (res) return tipc_cfg_reply_error_string("cannot change link setting"); @@ -2778,7 +2898,7 @@ struct sk_buff *tipc_link_cmd_config(const void *req_tlv_area, int req_tlv_space * @l_ptr: pointer to link */ -static void link_reset_statistics(struct link *l_ptr) +static void link_reset_statistics(struct tipc_link *l_ptr) { memset(&l_ptr->stats, 0, sizeof(l_ptr->stats)); l_ptr->stats.sent_info = l_ptr->next_out_no; @@ -2788,7 +2908,7 @@ static void link_reset_statistics(struct link *l_ptr) struct sk_buff *tipc_link_cmd_reset_stats(const void *req_tlv_area, int req_tlv_space) { char *link_name; - struct link *l_ptr; + struct tipc_link *l_ptr; struct tipc_node *node; if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_LINK_NAME)) @@ -2836,7 +2956,7 @@ static u32 percent(u32 count, u32 total) static int tipc_link_stats(const char *name, char *buf, const u32 buf_size) { struct print_buf pb; - struct link *l_ptr; + struct tipc_link *l_ptr; struct tipc_node *node; char *status; u32 profile_total = 0; @@ -2958,7 +3078,7 @@ struct sk_buff *tipc_link_cmd_show_stats(const void *req_tlv_area, int req_tlv_s u32 tipc_link_get_max_pkt(u32 dest, u32 selector) { struct tipc_node *n_ptr; - struct link *l_ptr; + struct tipc_link *l_ptr; u32 res = MAX_PKT_DEFAULT; if (dest == tipc_own_addr) @@ -2977,7 +3097,7 @@ u32 tipc_link_get_max_pkt(u32 dest, u32 selector) return res; } -static void link_print(struct link *l_ptr, const char *str) +static void link_print(struct tipc_link *l_ptr, const char *str) { char print_area[256]; struct print_buf pb; @@ -2997,13 +3117,12 @@ static void link_print(struct link *l_ptr, const char *str) tipc_printf(buf, "NXI(%u):", mod(l_ptr->next_in_no)); tipc_printf(buf, "SQUE"); if (l_ptr->first_out) { - tipc_printf(buf, "[%u..", msg_seqno(buf_msg(l_ptr->first_out))); + tipc_printf(buf, "[%u..", buf_seqno(l_ptr->first_out)); if (l_ptr->next_out) - tipc_printf(buf, "%u..", - msg_seqno(buf_msg(l_ptr->next_out))); - tipc_printf(buf, "%u]", msg_seqno(buf_msg(l_ptr->last_out))); - if ((mod(msg_seqno(buf_msg(l_ptr->last_out)) - - msg_seqno(buf_msg(l_ptr->first_out))) + tipc_printf(buf, "%u..", buf_seqno(l_ptr->next_out)); + tipc_printf(buf, "%u]", buf_seqno(l_ptr->last_out)); + if ((mod(buf_seqno(l_ptr->last_out) - + buf_seqno(l_ptr->first_out)) != (l_ptr->out_queue_size - 1)) || (l_ptr->last_out->next != NULL)) { tipc_printf(buf, "\nSend queue inconsistency\n"); @@ -3015,8 +3134,8 @@ static void link_print(struct link *l_ptr, const char *str) tipc_printf(buf, "[]"); tipc_printf(buf, "SQSIZ(%u)", l_ptr->out_queue_size); if (l_ptr->oldest_deferred_in) { - u32 o = msg_seqno(buf_msg(l_ptr->oldest_deferred_in)); - u32 n = msg_seqno(buf_msg(l_ptr->newest_deferred_in)); + u32 o = buf_seqno(l_ptr->oldest_deferred_in); + u32 n = buf_seqno(l_ptr->newest_deferred_in); tipc_printf(buf, ":RQUE[%u..%u]", o, n); if (l_ptr->deferred_inqueue_sz != mod((n + 1) - o)) { tipc_printf(buf, ":RQSIZ(%u)", diff --git a/net/tipc/link.h b/net/tipc/link.h index 74fbecab1ea0..73c18c140e1d 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -45,6 +45,12 @@ #define PUSH_FINISHED 2 /* + * Out-of-range value for link sequence numbers + */ + +#define INVALID_LINK_SEQ 0x10000 + +/* * Link states */ @@ -61,7 +67,7 @@ #define MAX_PKT_DEFAULT 1500 /** - * struct link - TIPC link data structure + * struct tipc_link - TIPC link data structure * @addr: network address of link's peer node * @name: link name character string * @media_addr: media address to use when sending messages over link @@ -109,7 +115,7 @@ * @stats: collects statistics regarding link activity */ -struct link { +struct tipc_link { u32 addr; char name[TIPC_MAX_LINK_NAME]; struct tipc_media_addr media_addr; @@ -207,23 +213,24 @@ struct link { struct tipc_port; -struct link *tipc_link_create(struct tipc_node *n_ptr, +struct tipc_link *tipc_link_create(struct tipc_node *n_ptr, struct tipc_bearer *b_ptr, const struct tipc_media_addr *media_addr); -void tipc_link_delete(struct link *l_ptr); -void tipc_link_changeover(struct link *l_ptr); -void tipc_link_send_duplicate(struct link *l_ptr, struct link *dest); -void tipc_link_reset_fragments(struct link *l_ptr); -int tipc_link_is_up(struct link *l_ptr); -int tipc_link_is_active(struct link *l_ptr); -u32 tipc_link_push_packet(struct link *l_ptr); -void tipc_link_stop(struct link *l_ptr); +void tipc_link_delete(struct tipc_link *l_ptr); +void tipc_link_changeover(struct tipc_link *l_ptr); +void tipc_link_send_duplicate(struct tipc_link *l_ptr, struct tipc_link *dest); +void tipc_link_reset_fragments(struct tipc_link *l_ptr); +int tipc_link_is_up(struct tipc_link *l_ptr); +int tipc_link_is_active(struct tipc_link *l_ptr); +u32 tipc_link_push_packet(struct tipc_link *l_ptr); +void tipc_link_stop(struct tipc_link *l_ptr); struct sk_buff *tipc_link_cmd_config(const void *req_tlv_area, int req_tlv_space, u16 cmd); struct sk_buff *tipc_link_cmd_show_stats(const void *req_tlv_area, int req_tlv_space); struct sk_buff *tipc_link_cmd_reset_stats(const void *req_tlv_area, int req_tlv_space); -void tipc_link_reset(struct link *l_ptr); +void tipc_link_reset(struct tipc_link *l_ptr); int tipc_link_send(struct sk_buff *buf, u32 dest, u32 selector); -int tipc_link_send_buf(struct link *l_ptr, struct sk_buff *buf); +void tipc_link_send_names(struct list_head *message_list, u32 dest); +int tipc_link_send_buf(struct tipc_link *l_ptr, struct sk_buff *buf); u32 tipc_link_get_max_pkt(u32 dest, u32 selector); int tipc_link_send_sections_fast(struct tipc_port *sender, struct iovec const *msg_sect, @@ -234,19 +241,26 @@ void tipc_link_recv_bundle(struct sk_buff *buf); int tipc_link_recv_fragment(struct sk_buff **pending, struct sk_buff **fb, struct tipc_msg **msg); -void tipc_link_send_proto_msg(struct link *l_ptr, u32 msg_typ, int prob, u32 gap, - u32 tolerance, u32 priority, u32 acked_mtu); -void tipc_link_push_queue(struct link *l_ptr); +void tipc_link_send_proto_msg(struct tipc_link *l_ptr, u32 msg_typ, int prob, + u32 gap, u32 tolerance, u32 priority, + u32 acked_mtu); +void tipc_link_push_queue(struct tipc_link *l_ptr); u32 tipc_link_defer_pkt(struct sk_buff **head, struct sk_buff **tail, struct sk_buff *buf); -void tipc_link_wakeup_ports(struct link *l_ptr, int all); -void tipc_link_set_queue_limits(struct link *l_ptr, u32 window); -void tipc_link_retransmit(struct link *l_ptr, struct sk_buff *start, u32 retransmits); +void tipc_link_wakeup_ports(struct tipc_link *l_ptr, int all); +void tipc_link_set_queue_limits(struct tipc_link *l_ptr, u32 window); +void tipc_link_retransmit(struct tipc_link *l_ptr, + struct sk_buff *start, u32 retransmits); /* * Link sequence number manipulation routines (uses modulo 2**16 arithmetic) */ +static inline u32 buf_seqno(struct sk_buff *buf) +{ + return msg_seqno(buf_msg(buf)); +} + static inline u32 mod(u32 x) { return x & 0xffffu; @@ -281,32 +295,32 @@ static inline u32 lesser(u32 left, u32 right) * Link status checking routines */ -static inline int link_working_working(struct link *l_ptr) +static inline int link_working_working(struct tipc_link *l_ptr) { return l_ptr->state == WORKING_WORKING; } -static inline int link_working_unknown(struct link *l_ptr) +static inline int link_working_unknown(struct tipc_link *l_ptr) { return l_ptr->state == WORKING_UNKNOWN; } -static inline int link_reset_unknown(struct link *l_ptr) +static inline int link_reset_unknown(struct tipc_link *l_ptr) { return l_ptr->state == RESET_UNKNOWN; } -static inline int link_reset_reset(struct link *l_ptr) +static inline int link_reset_reset(struct tipc_link *l_ptr) { return l_ptr->state == RESET_RESET; } -static inline int link_blocked(struct link *l_ptr) +static inline int link_blocked(struct tipc_link *l_ptr) { return l_ptr->exp_msg_count || l_ptr->blocked; } -static inline int link_congested(struct link *l_ptr) +static inline int link_congested(struct tipc_link *l_ptr) { return l_ptr->out_queue_size >= l_ptr->queue_limit[0]; } diff --git a/net/tipc/msg.c b/net/tipc/msg.c index 83d50967910c..3e4d3e29be61 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -333,11 +333,14 @@ void tipc_msg_dbg(struct print_buf *buf, struct tipc_msg *msg, const char *str) } if (msg_user(msg) == LINK_CONFIG) { - u32 *raw = (u32 *)msg; - struct tipc_media_addr *orig = (struct tipc_media_addr *)&raw[5]; + struct tipc_media_addr orig; + tipc_printf(buf, ":DDOM(%x):", msg_dest_domain(msg)); tipc_printf(buf, ":NETID(%u):", msg_bc_netid(msg)); - tipc_media_addr_printf(buf, orig); + memcpy(orig.value, msg_media_addr(msg), sizeof(orig.value)); + orig.media_id = 0; + orig.broadcast = 0; + tipc_media_addr_printf(buf, &orig); } if (msg_user(msg) == BCAST_PROTOCOL) { tipc_printf(buf, "BCNACK:AFTER(%u):", msg_bcgap_after(msg)); diff --git a/net/tipc/msg.h b/net/tipc/msg.h index d93178f2e852..7b0cda167107 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -78,6 +78,8 @@ #define MAX_MSG_SIZE (MAX_H_SIZE + TIPC_MAX_USER_MSG_SIZE) +#define TIPC_MEDIA_ADDR_OFFSET 5 + struct tipc_msg { __be32 hdr[15]; @@ -682,6 +684,10 @@ static inline void msg_set_redundant_link(struct tipc_msg *m, u32 r) msg_set_bits(m, 5, 12, 0x1, r); } +static inline char *msg_media_addr(struct tipc_msg *m) +{ + return (char *)&m->hdr[TIPC_MEDIA_ADDR_OFFSET]; +} /* * Word 9 @@ -734,14 +740,4 @@ int tipc_msg_build(struct tipc_msg *hdr, struct iovec const *msg_sect, u32 num_sect, unsigned int total_len, int max_size, int usrmem, struct sk_buff **buf); -static inline void msg_set_media_addr(struct tipc_msg *m, struct tipc_media_addr *a) -{ - memcpy(&((int *)m)[5], a, sizeof(*a)); -} - -static inline void msg_get_media_addr(struct tipc_msg *m, struct tipc_media_addr *a) -{ - memcpy(a, &((int *)m)[5], sizeof(*a)); -} - #endif diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c index cd356e504332..98ebb37f1808 100644 --- a/net/tipc/name_distr.c +++ b/net/tipc/name_distr.c @@ -173,18 +173,40 @@ void tipc_named_withdraw(struct publication *publ) * tipc_named_node_up - tell specified node about all publications by this node */ -void tipc_named_node_up(unsigned long node) +void tipc_named_node_up(unsigned long nodearg) { + struct tipc_node *n_ptr; + struct tipc_link *l_ptr; struct publication *publ; struct distr_item *item = NULL; struct sk_buff *buf = NULL; + struct list_head message_list; + u32 node = (u32)nodearg; u32 left = 0; u32 rest; - u32 max_item_buf; + u32 max_item_buf = 0; + + /* compute maximum amount of publication data to send per message */ + + read_lock_bh(&tipc_net_lock); + n_ptr = tipc_node_find(node); + if (n_ptr) { + tipc_node_lock(n_ptr); + l_ptr = n_ptr->active_links[0]; + if (l_ptr) + max_item_buf = ((l_ptr->max_pkt - INT_H_SIZE) / + ITEM_SIZE) * ITEM_SIZE; + tipc_node_unlock(n_ptr); + } + read_unlock_bh(&tipc_net_lock); + if (!max_item_buf) + return; + + /* create list of publication messages, then send them as a unit */ + + INIT_LIST_HEAD(&message_list); read_lock_bh(&tipc_nametbl_lock); - max_item_buf = TIPC_MAX_USER_MSG_SIZE / ITEM_SIZE; - max_item_buf *= ITEM_SIZE; rest = publ_cnt * ITEM_SIZE; list_for_each_entry(publ, &publ_root, local_list) { @@ -202,13 +224,14 @@ void tipc_named_node_up(unsigned long node) item++; left -= ITEM_SIZE; if (!left) { - msg_set_link_selector(buf_msg(buf), node); - tipc_link_send(buf, node, node); + list_add_tail((struct list_head *)buf, &message_list); buf = NULL; } } exit: read_unlock_bh(&tipc_nametbl_lock); + + tipc_link_send_names(&message_list, (u32)node); } /** @@ -299,10 +322,9 @@ void tipc_named_recv(struct sk_buff *buf) /** * tipc_named_reinit - re-initialize local publication list * - * This routine is called whenever TIPC networking is (re)enabled. + * This routine is called whenever TIPC networking is enabled. * All existing publications by this node that have "cluster" or "zone" scope - * are updated to reflect the node's current network address. - * (If the node's address is unchanged, the update loop terminates immediately.) + * are updated to reflect the node's new network address. */ void tipc_named_reinit(void) @@ -310,10 +332,9 @@ void tipc_named_reinit(void) struct publication *publ; write_lock_bh(&tipc_nametbl_lock); - list_for_each_entry(publ, &publ_root, local_list) { - if (publ->node == tipc_own_addr) - break; + + list_for_each_entry(publ, &publ_root, local_list) publ->node = tipc_own_addr; - } + write_unlock_bh(&tipc_nametbl_lock); } diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index 46e6b6c2ecc9..89eb5621ebba 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -251,8 +251,8 @@ static struct publication *tipc_nameseq_insert_publ(struct name_seq *nseq, u32 type, u32 lower, u32 upper, u32 scope, u32 node, u32 port, u32 key) { - struct subscription *s; - struct subscription *st; + struct tipc_subscription *s; + struct tipc_subscription *st; struct publication *publ; struct sub_seq *sseq; struct name_info *info; @@ -381,7 +381,7 @@ static struct publication *tipc_nameseq_remove_publ(struct name_seq *nseq, u32 i struct sub_seq *sseq = nameseq_find_subseq(nseq, inst); struct name_info *info; struct sub_seq *free; - struct subscription *s, *st; + struct tipc_subscription *s, *st; int removed_subseq = 0; if (!sseq) @@ -448,7 +448,8 @@ found: * sequence overlapping with the requested sequence */ -static void tipc_nameseq_subscribe(struct name_seq *nseq, struct subscription *s) +static void tipc_nameseq_subscribe(struct name_seq *nseq, + struct tipc_subscription *s) { struct sub_seq *sseq = nseq->sseqs; @@ -625,7 +626,7 @@ not_found: */ int tipc_nametbl_mc_translate(u32 type, u32 lower, u32 upper, u32 limit, - struct port_list *dports) + struct tipc_port_list *dports) { struct name_seq *seq; struct sub_seq *sseq; @@ -739,7 +740,7 @@ int tipc_nametbl_withdraw(u32 type, u32 lower, u32 ref, u32 key) * tipc_nametbl_subscribe - add a subscription object to the name table */ -void tipc_nametbl_subscribe(struct subscription *s) +void tipc_nametbl_subscribe(struct tipc_subscription *s) { u32 type = s->seq.type; struct name_seq *seq; @@ -763,7 +764,7 @@ void tipc_nametbl_subscribe(struct subscription *s) * tipc_nametbl_unsubscribe - remove a subscription object from name table */ -void tipc_nametbl_unsubscribe(struct subscription *s) +void tipc_nametbl_unsubscribe(struct tipc_subscription *s) { struct name_seq *seq; diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h index 62d77e5e902e..8086b42f92ad 100644 --- a/net/tipc/name_table.h +++ b/net/tipc/name_table.h @@ -39,8 +39,8 @@ #include "node_subscr.h" -struct subscription; -struct port_list; +struct tipc_subscription; +struct tipc_port_list; /* * TIPC name types reserved for internal TIPC use (both current and planned) @@ -90,7 +90,7 @@ extern rwlock_t tipc_nametbl_lock; struct sk_buff *tipc_nametbl_get(const void *req_tlv_area, int req_tlv_space); u32 tipc_nametbl_translate(u32 type, u32 instance, u32 *node); int tipc_nametbl_mc_translate(u32 type, u32 lower, u32 upper, u32 limit, - struct port_list *dports); + struct tipc_port_list *dports); int tipc_nametbl_publish_rsv(u32 ref, unsigned int scope, struct tipc_name_seq const *seq); struct publication *tipc_nametbl_publish(u32 type, u32 lower, u32 upper, @@ -100,8 +100,8 @@ struct publication *tipc_nametbl_insert_publ(u32 type, u32 lower, u32 upper, u32 scope, u32 node, u32 ref, u32 key); struct publication *tipc_nametbl_remove_publ(u32 type, u32 lower, u32 node, u32 ref, u32 key); -void tipc_nametbl_subscribe(struct subscription *s); -void tipc_nametbl_unsubscribe(struct subscription *s); +void tipc_nametbl_subscribe(struct tipc_subscription *s); +void tipc_nametbl_unsubscribe(struct tipc_subscription *s); int tipc_nametbl_init(void); void tipc_nametbl_stop(void); diff --git a/net/tipc/net.c b/net/tipc/net.c index 68b3dd637291..61afee7e8291 100644 --- a/net/tipc/net.c +++ b/net/tipc/net.c @@ -141,17 +141,6 @@ void tipc_net_route_msg(struct sk_buff *buf) return; msg = buf_msg(buf); - msg_incr_reroute_cnt(msg); - if (msg_reroute_cnt(msg) > 6) { - if (msg_errcode(msg)) { - buf_discard(buf); - } else { - tipc_reject_msg(buf, msg_destport(msg) ? - TIPC_ERR_NO_PORT : TIPC_ERR_NO_NAME); - } - return; - } - /* Handle message for this node */ dnode = msg_short(msg) ? tipc_own_addr : msg_destnode(msg); if (tipc_in_scope(dnode, tipc_own_addr)) { @@ -185,7 +174,6 @@ void tipc_net_route_msg(struct sk_buff *buf) int tipc_net_start(u32 addr) { char addr_string[16]; - int res; if (tipc_mode != TIPC_NODE_MODE) return -ENOPROTOOPT; @@ -198,9 +186,7 @@ int tipc_net_start(u32 addr) tipc_named_reinit(); tipc_port_reinit(); - res = tipc_bclink_init(); - if (res) - return res; + tipc_bclink_init(); tipc_k_signal((Handler)tipc_subscr_start, 0); tipc_k_signal((Handler)tipc_cfg_init, 0); @@ -218,8 +204,8 @@ void tipc_net_stop(void) if (tipc_mode != TIPC_NET_MODE) return; write_lock_bh(&tipc_net_lock); - tipc_bearer_stop(); tipc_mode = TIPC_NODE_MODE; + tipc_bearer_stop(); tipc_bclink_stop(); list_for_each_entry_safe(node, t_node, &tipc_node_list, list) tipc_node_delete(node); diff --git a/net/tipc/node.c b/net/tipc/node.c index 2d106ef4fa4c..6b226faad89f 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -112,6 +112,7 @@ struct tipc_node *tipc_node_create(u32 addr) break; } list_add_tail(&n_ptr->list, &temp_node->list); + n_ptr->block_setup = WAIT_PEER_DOWN; tipc_num_nodes++; @@ -135,9 +136,9 @@ void tipc_node_delete(struct tipc_node *n_ptr) * Link becomes active (alone or shared) or standby, depending on its priority. */ -void tipc_node_link_up(struct tipc_node *n_ptr, struct link *l_ptr) +void tipc_node_link_up(struct tipc_node *n_ptr, struct tipc_link *l_ptr) { - struct link **active = &n_ptr->active_links[0]; + struct tipc_link **active = &n_ptr->active_links[0]; n_ptr->working_links++; @@ -170,14 +171,14 @@ void tipc_node_link_up(struct tipc_node *n_ptr, struct link *l_ptr) static void node_select_active_links(struct tipc_node *n_ptr) { - struct link **active = &n_ptr->active_links[0]; + struct tipc_link **active = &n_ptr->active_links[0]; u32 i; u32 highest_prio = 0; active[0] = active[1] = NULL; for (i = 0; i < MAX_BEARERS; i++) { - struct link *l_ptr = n_ptr->links[i]; + struct tipc_link *l_ptr = n_ptr->links[i]; if (!l_ptr || !tipc_link_is_up(l_ptr) || (l_ptr->priority < highest_prio)) @@ -196,9 +197,9 @@ static void node_select_active_links(struct tipc_node *n_ptr) * tipc_node_link_down - handle loss of link */ -void tipc_node_link_down(struct tipc_node *n_ptr, struct link *l_ptr) +void tipc_node_link_down(struct tipc_node *n_ptr, struct tipc_link *l_ptr) { - struct link **active; + struct tipc_link **active; n_ptr->working_links--; @@ -238,14 +239,14 @@ int tipc_node_is_up(struct tipc_node *n_ptr) return tipc_node_active_links(n_ptr); } -void tipc_node_attach_link(struct tipc_node *n_ptr, struct link *l_ptr) +void tipc_node_attach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr) { n_ptr->links[l_ptr->b_ptr->identity] = l_ptr; atomic_inc(&tipc_num_links); n_ptr->link_cnt++; } -void tipc_node_detach_link(struct tipc_node *n_ptr, struct link *l_ptr) +void tipc_node_detach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr) { n_ptr->links[l_ptr->b_ptr->identity] = NULL; atomic_dec(&tipc_num_links); @@ -306,13 +307,13 @@ static void node_established_contact(struct tipc_node *n_ptr) n_ptr->bclink.acked = tipc_bclink_get_last_sent(); if (n_ptr->bclink.supported) { - tipc_nmap_add(&tipc_bcast_nmap, n_ptr->addr); + tipc_bclink_add_node(n_ptr->addr); if (n_ptr->addr < tipc_own_addr) tipc_own_tag++; } } -static void node_cleanup_finished(unsigned long node_addr) +static void node_name_purge_complete(unsigned long node_addr) { struct tipc_node *n_ptr; @@ -320,7 +321,7 @@ static void node_cleanup_finished(unsigned long node_addr) n_ptr = tipc_node_find(node_addr); if (n_ptr) { tipc_node_lock(n_ptr); - n_ptr->cleanup_required = 0; + n_ptr->block_setup &= ~WAIT_NAMES_GONE; tipc_node_unlock(n_ptr); } read_unlock_bh(&tipc_net_lock); @@ -331,32 +332,35 @@ static void node_lost_contact(struct tipc_node *n_ptr) char addr_string[16]; u32 i; - /* Clean up broadcast reception remains */ - n_ptr->bclink.gap_after = n_ptr->bclink.gap_to = 0; - while (n_ptr->bclink.deferred_head) { - struct sk_buff *buf = n_ptr->bclink.deferred_head; - n_ptr->bclink.deferred_head = buf->next; - buf_discard(buf); - } - if (n_ptr->bclink.defragm) { - buf_discard(n_ptr->bclink.defragm); - n_ptr->bclink.defragm = NULL; - } + info("Lost contact with %s\n", + tipc_addr_string_fill(addr_string, n_ptr->addr)); + + /* Flush broadcast link info associated with lost node */ if (n_ptr->bclink.supported) { - tipc_bclink_acknowledge(n_ptr, - mod(n_ptr->bclink.acked + 10000)); - tipc_nmap_remove(&tipc_bcast_nmap, n_ptr->addr); + n_ptr->bclink.gap_after = n_ptr->bclink.gap_to = 0; + while (n_ptr->bclink.deferred_head) { + struct sk_buff *buf = n_ptr->bclink.deferred_head; + n_ptr->bclink.deferred_head = buf->next; + buf_discard(buf); + } + + if (n_ptr->bclink.defragm) { + buf_discard(n_ptr->bclink.defragm); + n_ptr->bclink.defragm = NULL; + } + + tipc_bclink_remove_node(n_ptr->addr); + tipc_bclink_acknowledge(n_ptr, INVALID_LINK_SEQ); if (n_ptr->addr < tipc_own_addr) tipc_own_tag--; - } - info("Lost contact with %s\n", - tipc_addr_string_fill(addr_string, n_ptr->addr)); + n_ptr->bclink.supported = 0; + } /* Abort link changeover */ for (i = 0; i < MAX_BEARERS; i++) { - struct link *l_ptr = n_ptr->links[i]; + struct tipc_link *l_ptr = n_ptr->links[i]; if (!l_ptr) continue; l_ptr->reset_checkpoint = l_ptr->next_in_no; @@ -367,10 +371,10 @@ static void node_lost_contact(struct tipc_node *n_ptr) /* Notify subscribers */ tipc_nodesub_notify(n_ptr); - /* Prevent re-contact with node until all cleanup is done */ + /* Prevent re-contact with node until cleanup is done */ - n_ptr->cleanup_required = 1; - tipc_k_signal((Handler)node_cleanup_finished, n_ptr->addr); + n_ptr->block_setup = WAIT_PEER_DOWN | WAIT_NAMES_GONE; + tipc_k_signal((Handler)node_name_purge_complete, n_ptr->addr); } struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space) diff --git a/net/tipc/node.h b/net/tipc/node.h index 5c61afc7a0b9..0b1c5f8b6996 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -42,6 +42,12 @@ #include "net.h" #include "bearer.h" +/* Flags used to block (re)establishment of contact with a neighboring node */ + +#define WAIT_PEER_DOWN 0x0001 /* wait to see that peer's links are down */ +#define WAIT_NAMES_GONE 0x0002 /* wait for peer's publications to be purged */ +#define WAIT_NODE_DOWN 0x0004 /* wait until peer node is declared down */ + /** * struct tipc_node - TIPC node structure * @addr: network address of node @@ -52,7 +58,7 @@ * @active_links: pointers to active links to node * @links: pointers to all links to node * @working_links: number of working links to node (both active and standby) - * @cleanup_required: non-zero if cleaning up after a prior loss of contact + * @block_setup: bit mask of conditions preventing link establishment to node * @link_cnt: number of links to node * @permit_changeover: non-zero if node has redundant links to this system * @bclink: broadcast-related info @@ -73,11 +79,11 @@ struct tipc_node { struct hlist_node hash; struct list_head list; struct list_head nsub; - struct link *active_links[2]; - struct link *links[MAX_BEARERS]; + struct tipc_link *active_links[2]; + struct tipc_link *links[MAX_BEARERS]; int link_cnt; int working_links; - int cleanup_required; + int block_setup; int permit_changeover; struct { int supported; @@ -111,10 +117,10 @@ extern u32 tipc_own_tag; struct tipc_node *tipc_node_find(u32 addr); struct tipc_node *tipc_node_create(u32 addr); void tipc_node_delete(struct tipc_node *n_ptr); -void tipc_node_attach_link(struct tipc_node *n_ptr, struct link *l_ptr); -void tipc_node_detach_link(struct tipc_node *n_ptr, struct link *l_ptr); -void tipc_node_link_down(struct tipc_node *n_ptr, struct link *l_ptr); -void tipc_node_link_up(struct tipc_node *n_ptr, struct link *l_ptr); +void tipc_node_attach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr); +void tipc_node_detach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr); +void tipc_node_link_down(struct tipc_node *n_ptr, struct tipc_link *l_ptr); +void tipc_node_link_up(struct tipc_node *n_ptr, struct tipc_link *l_ptr); int tipc_node_active_links(struct tipc_node *n_ptr); int tipc_node_redundant_links(struct tipc_node *n_ptr); int tipc_node_is_up(struct tipc_node *n_ptr); diff --git a/net/tipc/port.c b/net/tipc/port.c index 54d812a5a4d9..d91efc69e6f9 100644 --- a/net/tipc/port.c +++ b/net/tipc/port.c @@ -80,7 +80,7 @@ int tipc_multicast(u32 ref, struct tipc_name_seq const *seq, struct tipc_msg *hdr; struct sk_buff *buf; struct sk_buff *ibuf = NULL; - struct port_list dports = {0, NULL, }; + struct tipc_port_list dports = {0, NULL, }; struct tipc_port *oport = tipc_port_deref(ref); int ext_targets; int res; @@ -142,11 +142,11 @@ int tipc_multicast(u32 ref, struct tipc_name_seq const *seq, * If there is no port list, perform a lookup to create one */ -void tipc_port_recv_mcast(struct sk_buff *buf, struct port_list *dp) +void tipc_port_recv_mcast(struct sk_buff *buf, struct tipc_port_list *dp) { struct tipc_msg *msg; - struct port_list dports = {0, NULL, }; - struct port_list *item = dp; + struct tipc_port_list dports = {0, NULL, }; + struct tipc_port_list *item = dp; int cnt = 0; msg = buf_msg(buf); diff --git a/net/tipc/port.h b/net/tipc/port.h index b9aa34195aec..f751807e2a91 100644 --- a/net/tipc/port.h +++ b/net/tipc/port.h @@ -151,7 +151,7 @@ struct tipc_port { }; extern spinlock_t tipc_port_list_lock; -struct port_list; +struct tipc_port_list; /* * TIPC port manipulation routines @@ -228,7 +228,7 @@ int tipc_port_reject_sections(struct tipc_port *p_ptr, struct tipc_msg *hdr, unsigned int total_len, int err); struct sk_buff *tipc_port_get_ports(void); void tipc_port_recv_proto_msg(struct sk_buff *buf); -void tipc_port_recv_mcast(struct sk_buff *buf, struct port_list *dp); +void tipc_port_recv_mcast(struct sk_buff *buf, struct tipc_port_list *dp); void tipc_port_reinit(void); /** diff --git a/net/tipc/ref.c b/net/tipc/ref.c index 83116892528b..9e37b7812c3c 100644 --- a/net/tipc/ref.c +++ b/net/tipc/ref.c @@ -110,8 +110,7 @@ int tipc_ref_table_init(u32 requested_size, u32 start) /* allocate table & mark all entries as uninitialized */ - table = __vmalloc(actual_size * sizeof(struct reference), - GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, PAGE_KERNEL); + table = vzalloc(actual_size * sizeof(struct reference)); if (table == NULL) return -ENOMEM; diff --git a/net/tipc/socket.c b/net/tipc/socket.c index adb2eff4a102..e2f7c5d370ba 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -34,6 +34,7 @@ * POSSIBILITY OF SUCH DAMAGE. */ +#include <linux/export.h> #include <net/sock.h> #include "core.h" @@ -49,7 +50,7 @@ struct tipc_sock { struct sock sk; struct tipc_port *p; struct tipc_portid peer_name; - long conn_timeout; + unsigned int conn_timeout; }; #define tipc_sk(sk) ((struct tipc_sock *)(sk)) @@ -184,9 +185,6 @@ static int tipc_create(struct net *net, struct socket *sock, int protocol, /* Validate arguments */ - if (!net_eq(net, &init_net)) - return -EAFNOSUPPORT; - if (unlikely(protocol != 0)) return -EPROTONOSUPPORT; @@ -231,7 +229,7 @@ static int tipc_create(struct net *net, struct socket *sock, int protocol, sock_init_data(sock, sk); sk->sk_backlog_rcv = backlog_rcv; tipc_sk(sk)->p = tp_ptr; - tipc_sk(sk)->conn_timeout = msecs_to_jiffies(CONN_TIMEOUT_DEFAULT); + tipc_sk(sk)->conn_timeout = CONN_TIMEOUT_DEFAULT; spin_unlock_bh(tp_ptr->lock); @@ -525,6 +523,7 @@ static int send_msg(struct kiocb *iocb, struct socket *sock, struct tipc_port *tport = tipc_sk_port(sk); struct sockaddr_tipc *dest = (struct sockaddr_tipc *)m->msg_name; int needs_conn; + long timeout_val; int res = -EINVAL; if (unlikely(!dest)) @@ -564,6 +563,8 @@ static int send_msg(struct kiocb *iocb, struct socket *sock, reject_rx_queue(sk); } + timeout_val = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT); + do { if (dest->addrtype == TIPC_ADDR_NAME) { res = dest_name_check(dest, m); @@ -600,16 +601,14 @@ static int send_msg(struct kiocb *iocb, struct socket *sock, sock->state = SS_CONNECTING; break; } - if (m->msg_flags & MSG_DONTWAIT) { - res = -EWOULDBLOCK; + if (timeout_val <= 0L) { + res = timeout_val ? timeout_val : -EWOULDBLOCK; break; } release_sock(sk); - res = wait_event_interruptible(*sk_sleep(sk), - !tport->congested); + timeout_val = wait_event_interruptible_timeout(*sk_sleep(sk), + !tport->congested, timeout_val); lock_sock(sk); - if (res) - break; } while (1); exit: @@ -636,6 +635,7 @@ static int send_packet(struct kiocb *iocb, struct socket *sock, struct sock *sk = sock->sk; struct tipc_port *tport = tipc_sk_port(sk); struct sockaddr_tipc *dest = (struct sockaddr_tipc *)m->msg_name; + long timeout_val; int res; /* Handle implied connection establishment */ @@ -650,6 +650,8 @@ static int send_packet(struct kiocb *iocb, struct socket *sock, if (iocb) lock_sock(sk); + timeout_val = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT); + do { if (unlikely(sock->state != SS_CONNECTED)) { if (sock->state == SS_DISCONNECTING) @@ -663,16 +665,14 @@ static int send_packet(struct kiocb *iocb, struct socket *sock, total_len); if (likely(res != -ELINKCONG)) break; - if (m->msg_flags & MSG_DONTWAIT) { - res = -EWOULDBLOCK; + if (timeout_val <= 0L) { + res = timeout_val ? timeout_val : -EWOULDBLOCK; break; } release_sock(sk); - res = wait_event_interruptible(*sk_sleep(sk), - (!tport->congested || !tport->connected)); + timeout_val = wait_event_interruptible_timeout(*sk_sleep(sk), + (!tport->congested || !tport->connected), timeout_val); lock_sock(sk); - if (res) - break; } while (1); if (iocb) @@ -1369,7 +1369,7 @@ static int connect(struct socket *sock, struct sockaddr *dest, int destlen, struct msghdr m = {NULL,}; struct sk_buff *buf; struct tipc_msg *msg; - long timeout; + unsigned int timeout; int res; lock_sock(sk); @@ -1434,7 +1434,8 @@ static int connect(struct socket *sock, struct sockaddr *dest, int destlen, res = wait_event_interruptible_timeout(*sk_sleep(sk), (!skb_queue_empty(&sk->sk_receive_queue) || (sock->state != SS_CONNECTING)), - timeout ? timeout : MAX_SCHEDULE_TIMEOUT); + timeout ? (long)msecs_to_jiffies(timeout) + : MAX_SCHEDULE_TIMEOUT); lock_sock(sk); if (res > 0) { @@ -1480,9 +1481,7 @@ static int listen(struct socket *sock, int len) lock_sock(sk); - if (sock->state == SS_READY) - res = -EOPNOTSUPP; - else if (sock->state != SS_UNCONNECTED) + if (sock->state != SS_UNCONNECTED) res = -EINVAL; else { sock->state = SS_LISTENING; @@ -1510,10 +1509,6 @@ static int accept(struct socket *sock, struct socket *new_sock, int flags) lock_sock(sk); - if (sock->state == SS_READY) { - res = -EOPNOTSUPP; - goto exit; - } if (sock->state != SS_LISTENING) { res = -EINVAL; goto exit; @@ -1696,7 +1691,7 @@ static int setsockopt(struct socket *sock, res = tipc_set_portunreturnable(tport->ref, value); break; case TIPC_CONN_TIMEOUT: - tipc_sk(sk)->conn_timeout = msecs_to_jiffies(value); + tipc_sk(sk)->conn_timeout = value; /* no need to set "res", since already 0 at this point */ break; default: @@ -1752,7 +1747,7 @@ static int getsockopt(struct socket *sock, res = tipc_portunreturnable(tport->ref, &value); break; case TIPC_CONN_TIMEOUT: - value = jiffies_to_msecs(tipc_sk(sk)->conn_timeout); + value = tipc_sk(sk)->conn_timeout; /* no need to set "res", since already 0 at this point */ break; case TIPC_NODE_RECVQ_DEPTH: @@ -1790,11 +1785,11 @@ static const struct proto_ops msg_ops = { .bind = bind, .connect = connect, .socketpair = sock_no_socketpair, - .accept = accept, + .accept = sock_no_accept, .getname = get_name, .poll = poll, .ioctl = sock_no_ioctl, - .listen = listen, + .listen = sock_no_listen, .shutdown = shutdown, .setsockopt = setsockopt, .getsockopt = getsockopt, diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index 6cf726863485..8c49566da8f3 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -40,14 +40,14 @@ #include "subscr.h" /** - * struct subscriber - TIPC network topology subscriber + * struct tipc_subscriber - TIPC network topology subscriber * @port_ref: object reference to server port connecting to subscriber * @lock: pointer to spinlock controlling access to subscriber's server port * @subscriber_list: adjacent subscribers in top. server's list of subscribers * @subscription_list: list of subscription objects for this subscriber */ -struct subscriber { +struct tipc_subscriber { u32 port_ref; spinlock_t *lock; struct list_head subscriber_list; @@ -92,7 +92,7 @@ static u32 htohl(u32 in, int swap) * try to take the lock if the message is rejected and returned! */ -static void subscr_send_event(struct subscription *sub, +static void subscr_send_event(struct tipc_subscription *sub, u32 found_lower, u32 found_upper, u32 event, @@ -118,7 +118,7 @@ static void subscr_send_event(struct subscription *sub, * Returns 1 if there is overlap, otherwise 0. */ -int tipc_subscr_overlap(struct subscription *sub, +int tipc_subscr_overlap(struct tipc_subscription *sub, u32 found_lower, u32 found_upper) @@ -138,7 +138,7 @@ int tipc_subscr_overlap(struct subscription *sub, * Protected by nameseq.lock in name_table.c */ -void tipc_subscr_report_overlap(struct subscription *sub, +void tipc_subscr_report_overlap(struct tipc_subscription *sub, u32 found_lower, u32 found_upper, u32 event, @@ -151,14 +151,14 @@ void tipc_subscr_report_overlap(struct subscription *sub, if (!must && !(sub->filter & TIPC_SUB_PORTS)) return; - sub->event_cb(sub, found_lower, found_upper, event, port_ref, node); + subscr_send_event(sub, found_lower, found_upper, event, port_ref, node); } /** * subscr_timeout - subscription timeout has occurred */ -static void subscr_timeout(struct subscription *sub) +static void subscr_timeout(struct tipc_subscription *sub) { struct tipc_port *server_port; @@ -205,7 +205,7 @@ static void subscr_timeout(struct subscription *sub) * Called with subscriber port locked. */ -static void subscr_del(struct subscription *sub) +static void subscr_del(struct tipc_subscription *sub) { tipc_nametbl_unsubscribe(sub); list_del(&sub->subscription_list); @@ -224,11 +224,11 @@ static void subscr_del(struct subscription *sub) * simply wait for it to be released, then claim it.) */ -static void subscr_terminate(struct subscriber *subscriber) +static void subscr_terminate(struct tipc_subscriber *subscriber) { u32 port_ref; - struct subscription *sub; - struct subscription *sub_temp; + struct tipc_subscription *sub; + struct tipc_subscription *sub_temp; /* Invalidate subscriber reference */ @@ -278,10 +278,10 @@ static void subscr_terminate(struct subscriber *subscriber) */ static void subscr_cancel(struct tipc_subscr *s, - struct subscriber *subscriber) + struct tipc_subscriber *subscriber) { - struct subscription *sub; - struct subscription *sub_temp; + struct tipc_subscription *sub; + struct tipc_subscription *sub_temp; int found = 0; /* Find first matching subscription, exit if not found */ @@ -314,10 +314,10 @@ static void subscr_cancel(struct tipc_subscr *s, * Called with subscriber port locked. */ -static struct subscription *subscr_subscribe(struct tipc_subscr *s, - struct subscriber *subscriber) +static struct tipc_subscription *subscr_subscribe(struct tipc_subscr *s, + struct tipc_subscriber *subscriber) { - struct subscription *sub; + struct tipc_subscription *sub; int swap; /* Determine subscriber's endianness */ @@ -365,7 +365,6 @@ static struct subscription *subscr_subscribe(struct tipc_subscr *s, subscr_terminate(subscriber); return NULL; } - sub->event_cb = subscr_send_event; INIT_LIST_HEAD(&sub->nameseq_list); list_add(&sub->subscription_list, &subscriber->subscription_list); sub->server_ref = subscriber->port_ref; @@ -394,7 +393,7 @@ static void subscr_conn_shutdown_event(void *usr_handle, unsigned int size, int reason) { - struct subscriber *subscriber = usr_handle; + struct tipc_subscriber *subscriber = usr_handle; spinlock_t *subscriber_lock; if (tipc_port_lock(port_ref) == NULL) @@ -417,9 +416,9 @@ static void subscr_conn_msg_event(void *usr_handle, const unchar *data, u32 size) { - struct subscriber *subscriber = usr_handle; + struct tipc_subscriber *subscriber = usr_handle; spinlock_t *subscriber_lock; - struct subscription *sub; + struct tipc_subscription *sub; /* * Lock subscriber's server port (& make a local copy of lock pointer, @@ -472,12 +471,12 @@ static void subscr_named_msg_event(void *usr_handle, struct tipc_portid const *orig, struct tipc_name_seq const *dest) { - struct subscriber *subscriber; + struct tipc_subscriber *subscriber; u32 server_port_ref; /* Create subscriber object */ - subscriber = kzalloc(sizeof(struct subscriber), GFP_ATOMIC); + subscriber = kzalloc(sizeof(struct tipc_subscriber), GFP_ATOMIC); if (subscriber == NULL) { warn("Subscriber rejected, no memory\n"); return; @@ -569,8 +568,8 @@ failed: void tipc_subscr_stop(void) { - struct subscriber *subscriber; - struct subscriber *subscriber_temp; + struct tipc_subscriber *subscriber; + struct tipc_subscriber *subscriber_temp; spinlock_t *subscriber_lock; if (topsrv.setup_port) { diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h index 45d89bf4d202..ef6529c8456f 100644 --- a/net/tipc/subscr.h +++ b/net/tipc/subscr.h @@ -37,18 +37,13 @@ #ifndef _TIPC_SUBSCR_H #define _TIPC_SUBSCR_H -struct subscription; - -typedef void (*tipc_subscr_event) (struct subscription *sub, - u32 found_lower, u32 found_upper, - u32 event, u32 port_ref, u32 node); +struct tipc_subscription; /** - * struct subscription - TIPC network topology subscription object + * struct tipc_subscription - TIPC network topology subscription object * @seq: name sequence associated with subscription * @timeout: duration of subscription (in ms) * @filter: event filtering to be done for subscription - * @event_cb: routine invoked when a subscription event is detected * @timer: timer governing subscription duration (optional) * @nameseq_list: adjacent subscriptions in name sequence's subscription list * @subscription_list: adjacent subscriptions in subscriber's subscription list @@ -57,11 +52,10 @@ typedef void (*tipc_subscr_event) (struct subscription *sub, * @evt: template for events generated by subscription */ -struct subscription { +struct tipc_subscription { struct tipc_name_seq seq; u32 timeout; u32 filter; - tipc_subscr_event event_cb; struct timer_list timer; struct list_head nameseq_list; struct list_head subscription_list; @@ -70,11 +64,11 @@ struct subscription { struct tipc_event evt; }; -int tipc_subscr_overlap(struct subscription *sub, +int tipc_subscr_overlap(struct tipc_subscription *sub, u32 found_lower, u32 found_upper); -void tipc_subscr_report_overlap(struct subscription *sub, +void tipc_subscr_report_overlap(struct tipc_subscription *sub, u32 found_lower, u32 found_upper, u32 event, diff --git a/net/unix/Kconfig b/net/unix/Kconfig index 5a69733bcdad..8b31ab85d050 100644 --- a/net/unix/Kconfig +++ b/net/unix/Kconfig @@ -19,3 +19,10 @@ config UNIX Say Y unless you know what you are doing. +config UNIX_DIAG + tristate "UNIX: socket monitoring interface" + depends on UNIX + default n + ---help--- + Support for UNIX socket monitoring interface used by the ss tool. + If unsure, say Y. diff --git a/net/unix/Makefile b/net/unix/Makefile index b852a2bde9a8..b663c607b1c6 100644 --- a/net/unix/Makefile +++ b/net/unix/Makefile @@ -6,3 +6,6 @@ obj-$(CONFIG_UNIX) += unix.o unix-y := af_unix.o garbage.o unix-$(CONFIG_SYSCTL) += sysctl_net_unix.o + +obj-$(CONFIG_UNIX_DIAG) += unix_diag.o +unix_diag-y := diag.o diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index ec68e1c05b85..aad8fb699989 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -115,8 +115,10 @@ #include <net/checksum.h> #include <linux/security.h> -static struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1]; -static DEFINE_SPINLOCK(unix_table_lock); +struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1]; +EXPORT_SYMBOL_GPL(unix_socket_table); +DEFINE_SPINLOCK(unix_table_lock); +EXPORT_SYMBOL_GPL(unix_table_lock); static atomic_long_t unix_nr_socks; #define unix_sockets_unbound (&unix_socket_table[UNIX_HASH_SIZE]) @@ -172,7 +174,7 @@ static inline int unix_recvq_full(struct sock const *sk) return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog; } -static struct sock *unix_peer_get(struct sock *s) +struct sock *unix_peer_get(struct sock *s) { struct sock *peer; @@ -183,6 +185,7 @@ static struct sock *unix_peer_get(struct sock *s) unix_state_unlock(s); return peer; } +EXPORT_SYMBOL_GPL(unix_peer_get); static inline void unix_release_addr(struct unix_address *addr) { @@ -847,7 +850,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) atomic_set(&addr->refcnt, 1); if (sun_path[0]) { - unsigned int mode; + umode_t mode; err = 0; /* * Get the parent directory, calculate the hash for last @@ -1381,8 +1384,10 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds) { int err = 0; + UNIXCB(skb).pid = get_pid(scm->pid); - UNIXCB(skb).cred = get_cred(scm->cred); + if (scm->cred) + UNIXCB(skb).cred = get_cred(scm->cred); UNIXCB(skb).fp = NULL; if (scm->fp && send_fds) err = unix_attach_fds(scm, skb); @@ -1392,6 +1397,24 @@ static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool sen } /* + * Some apps rely on write() giving SCM_CREDENTIALS + * We include credentials if source or destination socket + * asserted SOCK_PASSCRED. + */ +static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock, + const struct sock *other) +{ + if (UNIXCB(skb).cred) + return; + if (test_bit(SOCK_PASSCRED, &sock->flags) || + !other->sk_socket || + test_bit(SOCK_PASSCRED, &other->sk_socket->flags)) { + UNIXCB(skb).pid = get_pid(task_tgid(current)); + UNIXCB(skb).cred = get_current_cred(); + } +} + +/* * Send AF_UNIX data. */ @@ -1538,6 +1561,7 @@ restart: if (sock_flag(other, SOCK_RCVTSTAMP)) __net_timestamp(skb); + maybe_add_creds(skb, sock, other); skb_queue_tail(&other->sk_receive_queue, skb); if (max_level > unix_sk(other)->recursion_level) unix_sk(other)->recursion_level = max_level; @@ -1652,6 +1676,7 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, (other->sk_shutdown & RCV_SHUTDOWN)) goto pipe_err_free; + maybe_add_creds(skb, sock, other); skb_queue_tail(&other->sk_receive_queue, skb); if (max_level > unix_sk(other)->recursion_level) unix_sk(other)->recursion_level = max_level; @@ -1935,6 +1960,7 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, if ((UNIXCB(skb).pid != siocb->scm->pid) || (UNIXCB(skb).cred != siocb->scm->cred)) { skb_queue_head(&sk->sk_receive_queue, skb); + sk->sk_data_ready(sk, skb->len); break; } } else { @@ -1952,6 +1978,7 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, chunk = min_t(unsigned int, skb->len, size); if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) { skb_queue_head(&sk->sk_receive_queue, skb); + sk->sk_data_ready(sk, skb->len); if (copied == 0) copied = -EFAULT; break; @@ -1969,6 +1996,7 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, /* put the skb back if we didn't use it up.. */ if (skb->len) { skb_queue_head(&sk->sk_receive_queue, skb); + sk->sk_data_ready(sk, skb->len); break; } @@ -1984,6 +2012,7 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, /* put message back and return */ skb_queue_head(&sk->sk_receive_queue, skb); + sk->sk_data_ready(sk, skb->len); break; } } while (size); @@ -2036,6 +2065,36 @@ static int unix_shutdown(struct socket *sock, int mode) return 0; } +long unix_inq_len(struct sock *sk) +{ + struct sk_buff *skb; + long amount = 0; + + if (sk->sk_state == TCP_LISTEN) + return -EINVAL; + + spin_lock(&sk->sk_receive_queue.lock); + if (sk->sk_type == SOCK_STREAM || + sk->sk_type == SOCK_SEQPACKET) { + skb_queue_walk(&sk->sk_receive_queue, skb) + amount += skb->len; + } else { + skb = skb_peek(&sk->sk_receive_queue); + if (skb) + amount = skb->len; + } + spin_unlock(&sk->sk_receive_queue.lock); + + return amount; +} +EXPORT_SYMBOL_GPL(unix_inq_len); + +long unix_outq_len(struct sock *sk) +{ + return sk_wmem_alloc_get(sk); +} +EXPORT_SYMBOL_GPL(unix_outq_len); + static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { struct sock *sk = sock->sk; @@ -2044,33 +2103,16 @@ static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) switch (cmd) { case SIOCOUTQ: - amount = sk_wmem_alloc_get(sk); + amount = unix_outq_len(sk); err = put_user(amount, (int __user *)arg); break; case SIOCINQ: - { - struct sk_buff *skb; - - if (sk->sk_state == TCP_LISTEN) { - err = -EINVAL; - break; - } - - spin_lock(&sk->sk_receive_queue.lock); - if (sk->sk_type == SOCK_STREAM || - sk->sk_type == SOCK_SEQPACKET) { - skb_queue_walk(&sk->sk_receive_queue, skb) - amount += skb->len; - } else { - skb = skb_peek(&sk->sk_receive_queue); - if (skb) - amount = skb->len; - } - spin_unlock(&sk->sk_receive_queue.lock); + amount = unix_inq_len(sk); + if (amount < 0) + err = amount; + else err = put_user(amount, (int __user *)arg); - break; - } - + break; default: err = -ENOIOCTLCMD; break; diff --git a/net/unix/diag.c b/net/unix/diag.c new file mode 100644 index 000000000000..6b7697fd911b --- /dev/null +++ b/net/unix/diag.c @@ -0,0 +1,329 @@ +#include <linux/types.h> +#include <linux/spinlock.h> +#include <linux/sock_diag.h> +#include <linux/unix_diag.h> +#include <linux/skbuff.h> +#include <linux/module.h> +#include <net/netlink.h> +#include <net/af_unix.h> +#include <net/tcp_states.h> + +#define UNIX_DIAG_PUT(skb, attrtype, attrlen) \ + RTA_DATA(__RTA_PUT(skb, attrtype, attrlen)) + +static int sk_diag_dump_name(struct sock *sk, struct sk_buff *nlskb) +{ + struct unix_address *addr = unix_sk(sk)->addr; + char *s; + + if (addr) { + s = UNIX_DIAG_PUT(nlskb, UNIX_DIAG_NAME, addr->len - sizeof(short)); + memcpy(s, addr->name->sun_path, addr->len - sizeof(short)); + } + + return 0; + +rtattr_failure: + return -EMSGSIZE; +} + +static int sk_diag_dump_vfs(struct sock *sk, struct sk_buff *nlskb) +{ + struct dentry *dentry = unix_sk(sk)->dentry; + struct unix_diag_vfs *uv; + + if (dentry) { + uv = UNIX_DIAG_PUT(nlskb, UNIX_DIAG_VFS, sizeof(*uv)); + uv->udiag_vfs_ino = dentry->d_inode->i_ino; + uv->udiag_vfs_dev = dentry->d_sb->s_dev; + } + + return 0; + +rtattr_failure: + return -EMSGSIZE; +} + +static int sk_diag_dump_peer(struct sock *sk, struct sk_buff *nlskb) +{ + struct sock *peer; + int ino; + + peer = unix_peer_get(sk); + if (peer) { + unix_state_lock(peer); + ino = sock_i_ino(peer); + unix_state_unlock(peer); + sock_put(peer); + + RTA_PUT_U32(nlskb, UNIX_DIAG_PEER, ino); + } + + return 0; +rtattr_failure: + return -EMSGSIZE; +} + +static int sk_diag_dump_icons(struct sock *sk, struct sk_buff *nlskb) +{ + struct sk_buff *skb; + u32 *buf; + int i; + + if (sk->sk_state == TCP_LISTEN) { + spin_lock(&sk->sk_receive_queue.lock); + buf = UNIX_DIAG_PUT(nlskb, UNIX_DIAG_ICONS, + sk->sk_receive_queue.qlen * sizeof(u32)); + i = 0; + skb_queue_walk(&sk->sk_receive_queue, skb) { + struct sock *req, *peer; + + req = skb->sk; + /* + * The state lock is outer for the same sk's + * queue lock. With the other's queue locked it's + * OK to lock the state. + */ + unix_state_lock_nested(req); + peer = unix_sk(req)->peer; + buf[i++] = (peer ? sock_i_ino(peer) : 0); + unix_state_unlock(req); + } + spin_unlock(&sk->sk_receive_queue.lock); + } + + return 0; + +rtattr_failure: + spin_unlock(&sk->sk_receive_queue.lock); + return -EMSGSIZE; +} + +static int sk_diag_show_rqlen(struct sock *sk, struct sk_buff *nlskb) +{ + struct unix_diag_rqlen *rql; + + rql = UNIX_DIAG_PUT(nlskb, UNIX_DIAG_RQLEN, sizeof(*rql)); + + if (sk->sk_state == TCP_LISTEN) { + rql->udiag_rqueue = sk->sk_receive_queue.qlen; + rql->udiag_wqueue = sk->sk_max_ack_backlog; + } else { + rql->udiag_rqueue = (__u32)unix_inq_len(sk); + rql->udiag_wqueue = (__u32)unix_outq_len(sk); + } + + return 0; + +rtattr_failure: + return -EMSGSIZE; +} + +static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_req *req, + u32 pid, u32 seq, u32 flags, int sk_ino) +{ + unsigned char *b = skb_tail_pointer(skb); + struct nlmsghdr *nlh; + struct unix_diag_msg *rep; + + nlh = NLMSG_PUT(skb, pid, seq, SOCK_DIAG_BY_FAMILY, sizeof(*rep)); + nlh->nlmsg_flags = flags; + + rep = NLMSG_DATA(nlh); + + rep->udiag_family = AF_UNIX; + rep->udiag_type = sk->sk_type; + rep->udiag_state = sk->sk_state; + rep->udiag_ino = sk_ino; + sock_diag_save_cookie(sk, rep->udiag_cookie); + + if ((req->udiag_show & UDIAG_SHOW_NAME) && + sk_diag_dump_name(sk, skb)) + goto nlmsg_failure; + + if ((req->udiag_show & UDIAG_SHOW_VFS) && + sk_diag_dump_vfs(sk, skb)) + goto nlmsg_failure; + + if ((req->udiag_show & UDIAG_SHOW_PEER) && + sk_diag_dump_peer(sk, skb)) + goto nlmsg_failure; + + if ((req->udiag_show & UDIAG_SHOW_ICONS) && + sk_diag_dump_icons(sk, skb)) + goto nlmsg_failure; + + if ((req->udiag_show & UDIAG_SHOW_RQLEN) && + sk_diag_show_rqlen(sk, skb)) + goto nlmsg_failure; + + if ((req->udiag_show & UDIAG_SHOW_MEMINFO) && + sock_diag_put_meminfo(sk, skb, UNIX_DIAG_MEMINFO)) + goto nlmsg_failure; + + nlh->nlmsg_len = skb_tail_pointer(skb) - b; + return skb->len; + +nlmsg_failure: + nlmsg_trim(skb, b); + return -EMSGSIZE; +} + +static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, struct unix_diag_req *req, + u32 pid, u32 seq, u32 flags) +{ + int sk_ino; + + unix_state_lock(sk); + sk_ino = sock_i_ino(sk); + unix_state_unlock(sk); + + if (!sk_ino) + return 0; + + return sk_diag_fill(sk, skb, req, pid, seq, flags, sk_ino); +} + +static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct unix_diag_req *req; + int num, s_num, slot, s_slot; + + req = NLMSG_DATA(cb->nlh); + + s_slot = cb->args[0]; + num = s_num = cb->args[1]; + + spin_lock(&unix_table_lock); + for (slot = s_slot; slot <= UNIX_HASH_SIZE; s_num = 0, slot++) { + struct sock *sk; + struct hlist_node *node; + + num = 0; + sk_for_each(sk, node, &unix_socket_table[slot]) { + if (num < s_num) + goto next; + if (!(req->udiag_states & (1 << sk->sk_state))) + goto next; + if (sk_diag_dump(sk, skb, req, + NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, + NLM_F_MULTI) < 0) + goto done; +next: + num++; + } + } +done: + spin_unlock(&unix_table_lock); + cb->args[0] = slot; + cb->args[1] = num; + + return skb->len; +} + +static struct sock *unix_lookup_by_ino(int ino) +{ + int i; + struct sock *sk; + + spin_lock(&unix_table_lock); + for (i = 0; i <= UNIX_HASH_SIZE; i++) { + struct hlist_node *node; + + sk_for_each(sk, node, &unix_socket_table[i]) + if (ino == sock_i_ino(sk)) { + sock_hold(sk); + spin_unlock(&unix_table_lock); + + return sk; + } + } + + spin_unlock(&unix_table_lock); + return NULL; +} + +static int unix_diag_get_exact(struct sk_buff *in_skb, + const struct nlmsghdr *nlh, + struct unix_diag_req *req) +{ + int err = -EINVAL; + struct sock *sk; + struct sk_buff *rep; + unsigned int extra_len; + + if (req->udiag_ino == 0) + goto out_nosk; + + sk = unix_lookup_by_ino(req->udiag_ino); + err = -ENOENT; + if (sk == NULL) + goto out_nosk; + + err = sock_diag_check_cookie(sk, req->udiag_cookie); + if (err) + goto out; + + extra_len = 256; +again: + err = -ENOMEM; + rep = alloc_skb(NLMSG_SPACE((sizeof(struct unix_diag_msg) + extra_len)), + GFP_KERNEL); + if (!rep) + goto out; + + err = sk_diag_fill(sk, rep, req, NETLINK_CB(in_skb).pid, + nlh->nlmsg_seq, 0, req->udiag_ino); + if (err < 0) { + kfree_skb(rep); + extra_len += 256; + if (extra_len >= PAGE_SIZE) + goto out; + + goto again; + } + err = netlink_unicast(sock_diag_nlsk, rep, NETLINK_CB(in_skb).pid, + MSG_DONTWAIT); + if (err > 0) + err = 0; +out: + if (sk) + sock_put(sk); +out_nosk: + return err; +} + +static int unix_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h) +{ + int hdrlen = sizeof(struct unix_diag_req); + + if (nlmsg_len(h) < hdrlen) + return -EINVAL; + + if (h->nlmsg_flags & NLM_F_DUMP) + return netlink_dump_start(sock_diag_nlsk, skb, h, + unix_diag_dump, NULL, 0); + else + return unix_diag_get_exact(skb, h, (struct unix_diag_req *)NLMSG_DATA(h)); +} + +static struct sock_diag_handler unix_diag_handler = { + .family = AF_UNIX, + .dump = unix_diag_handler_dump, +}; + +static int __init unix_diag_init(void) +{ + return sock_diag_register(&unix_diag_handler); +} + +static void __exit unix_diag_exit(void) +{ + sock_diag_unregister(&unix_diag_handler); +} + +module_init(unix_diag_init); +module_exit(unix_diag_exit); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 1 /* AF_LOCAL */); diff --git a/net/wanrouter/wanproc.c b/net/wanrouter/wanproc.c index f346395314ba..c43612ee96bb 100644 --- a/net/wanrouter/wanproc.c +++ b/net/wanrouter/wanproc.c @@ -81,7 +81,6 @@ static struct proc_dir_entry *proc_router; * Iterator */ static void *r_start(struct seq_file *m, loff_t *pos) - __acquires(kernel_lock) { struct wan_device *wandev; loff_t l = *pos; @@ -103,7 +102,6 @@ static void *r_next(struct seq_file *m, void *v, loff_t *pos) } static void r_stop(struct seq_file *m, void *v) - __releases(kernel_lock) { mutex_unlock(&config_mutex); } diff --git a/net/wimax/op-msg.c b/net/wimax/op-msg.c index d5b7c3779c43..0694d62e4dbc 100644 --- a/net/wimax/op-msg.c +++ b/net/wimax/op-msg.c @@ -77,6 +77,7 @@ #include <linux/netdevice.h> #include <linux/wimax.h> #include <linux/security.h> +#include <linux/export.h> #include "wimax-internal.h" diff --git a/net/wimax/op-reset.c b/net/wimax/op-reset.c index 68bedf3e5443..7ceffe39d70e 100644 --- a/net/wimax/op-reset.c +++ b/net/wimax/op-reset.c @@ -32,6 +32,7 @@ #include <net/genetlink.h> #include <linux/wimax.h> #include <linux/security.h> +#include <linux/export.h> #include "wimax-internal.h" #define D_SUBMODULE op_reset diff --git a/net/wimax/op-rfkill.c b/net/wimax/op-rfkill.c index 2609e445fe7d..7ab60babdd22 100644 --- a/net/wimax/op-rfkill.c +++ b/net/wimax/op-rfkill.c @@ -65,6 +65,7 @@ #include <linux/wimax.h> #include <linux/security.h> #include <linux/rfkill.h> +#include <linux/export.h> #include "wimax-internal.h" #define D_SUBMODULE op_rfkill diff --git a/net/wimax/stack.c b/net/wimax/stack.c index ee99e7dfcdba..3c65eae701c4 100644 --- a/net/wimax/stack.c +++ b/net/wimax/stack.c @@ -55,6 +55,7 @@ #include <net/genetlink.h> #include <linux/netdevice.h> #include <linux/wimax.h> +#include <linux/module.h> #include "wimax-internal.h" diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig index 1f1ef70f34f2..2e4444fedbe0 100644 --- a/net/wireless/Kconfig +++ b/net/wireless/Kconfig @@ -121,15 +121,16 @@ config CFG80211_WEXT config WIRELESS_EXT_SYSFS bool "Wireless extensions sysfs files" - default y depends on WEXT_CORE && SYSFS help This option enables the deprecated wireless statistics files in /sys/class/net/*/wireless/. The same information is available via the ioctls as well. - Say Y if you have programs using it, like old versions of - hal. + Say N. If you know you have ancient tools requiring it, + like very old versions of hal (prior to 0.5.12 release), + say Y and update the tools as soon as possible as this + option will be removed soon. config LIB80211 tristate "Common routines for IEEE802.11 drivers" diff --git a/net/wireless/chan.c b/net/wireless/chan.c index 17cd0c04d139..2fcfe0993ca2 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -6,6 +6,7 @@ * Copyright 2009 Johannes Berg <johannes@sipsolutions.net> */ +#include <linux/export.h> #include <net/cfg80211.h> #include "core.h" @@ -44,9 +45,9 @@ rdev_freq_to_chan(struct cfg80211_registered_device *rdev, return chan; } -static bool can_beacon_sec_chan(struct wiphy *wiphy, - struct ieee80211_channel *chan, - enum nl80211_channel_type channel_type) +int cfg80211_can_beacon_sec_chan(struct wiphy *wiphy, + struct ieee80211_channel *chan, + enum nl80211_channel_type channel_type) { struct ieee80211_channel *sec_chan; int diff; @@ -75,6 +76,7 @@ static bool can_beacon_sec_chan(struct wiphy *wiphy, return true; } +EXPORT_SYMBOL(cfg80211_can_beacon_sec_chan); int cfg80211_set_freq(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev, int freq, @@ -109,8 +111,8 @@ int cfg80211_set_freq(struct cfg80211_registered_device *rdev, switch (channel_type) { case NL80211_CHAN_HT40PLUS: case NL80211_CHAN_HT40MINUS: - if (!can_beacon_sec_chan(&rdev->wiphy, chan, - channel_type)) { + if (!cfg80211_can_beacon_sec_chan(&rdev->wiphy, chan, + channel_type)) { printk(KERN_DEBUG "cfg80211: Secondary channel not " "allowed to initiate communication\n"); diff --git a/net/wireless/core.c b/net/wireless/core.c index c14865172da7..ccdfed897651 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -492,6 +492,10 @@ int wiphy_register(struct wiphy *wiphy) !(wiphy->wowlan.flags & WIPHY_WOWLAN_SUPPORTS_GTK_REKEY))) return -EINVAL; + if (WARN_ON(wiphy->ap_sme_capa && + !(wiphy->flags & WIPHY_FLAG_HAVE_AP_SME))) + return -EINVAL; + if (WARN_ON(wiphy->addresses && !wiphy->n_addresses)) return -EINVAL; @@ -582,7 +586,7 @@ int wiphy_register(struct wiphy *wiphy) } /* set up regulatory info */ - wiphy_update_regulatory(wiphy, NL80211_REGDOM_SET_BY_CORE); + regulatory_update(wiphy, NL80211_REGDOM_SET_BY_CORE); list_add_rcu(&rdev->list, &cfg80211_rdev_list); cfg80211_rdev_list_generation++; diff --git a/net/wireless/core.h b/net/wireless/core.h index 8672e028022f..43ad9c81efcf 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -54,6 +54,8 @@ struct cfg80211_registered_device { int opencount; /* also protected by devlist_mtx */ wait_queue_head_t dev_wait; + u32 ap_beacons_nlpid; + /* BSSes/scanning */ spinlock_t bss_lock; struct list_head bss_list; @@ -247,12 +249,11 @@ struct cfg80211_event { u16 status; } cr; struct { - struct ieee80211_channel *channel; - u8 bssid[ETH_ALEN]; const u8 *req_ie; const u8 *resp_ie; size_t req_ie_len; size_t resp_ie_len; + struct cfg80211_bss *bss; } rm; struct { const u8 *ie; @@ -279,8 +280,6 @@ extern int cfg80211_dev_rename(struct cfg80211_registered_device *rdev, char *newname); void ieee80211_set_bitrate_flags(struct wiphy *wiphy); -void wiphy_update_regulatory(struct wiphy *wiphy, - enum nl80211_reg_initiator setby); void cfg80211_bss_expire(struct cfg80211_registered_device *dev); void cfg80211_bss_age(struct cfg80211_registered_device *dev, @@ -341,13 +340,17 @@ int __cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev, const u8 *bssid, const u8 *prev_bssid, const u8 *ssid, int ssid_len, const u8 *ie, int ie_len, bool use_mfp, - struct cfg80211_crypto_settings *crypt); + struct cfg80211_crypto_settings *crypt, + u32 assoc_flags, struct ieee80211_ht_cap *ht_capa, + struct ieee80211_ht_cap *ht_capa_mask); int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev, struct net_device *dev, struct ieee80211_channel *chan, const u8 *bssid, const u8 *prev_bssid, const u8 *ssid, int ssid_len, const u8 *ie, int ie_len, bool use_mfp, - struct cfg80211_crypto_settings *crypt); + struct cfg80211_crypto_settings *crypt, + u32 assoc_flags, struct ieee80211_ht_cap *ht_capa, + struct ieee80211_ht_cap *ht_capa_mask); int __cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev, struct net_device *dev, const u8 *bssid, const u8 *ie, int ie_len, u16 reason, @@ -377,7 +380,10 @@ int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev, struct ieee80211_channel *chan, bool offchan, enum nl80211_channel_type channel_type, bool channel_type_valid, unsigned int wait, - const u8 *buf, size_t len, u64 *cookie); + const u8 *buf, size_t len, bool no_cck, + bool dont_wait_for_ack, u64 *cookie); +void cfg80211_oper_and_ht_capa(struct ieee80211_ht_cap *ht_capa, + const struct ieee80211_ht_cap *ht_capa_mask); /* SME */ int __cfg80211_connect(struct cfg80211_registered_device *rdev, @@ -396,8 +402,7 @@ int cfg80211_disconnect(struct cfg80211_registered_device *rdev, struct net_device *dev, u16 reason, bool wextev); void __cfg80211_roamed(struct wireless_dev *wdev, - struct ieee80211_channel *channel, - const u8 *bssid, + struct cfg80211_bss *bss, const u8 *req_ie, size_t req_ie_len, const u8 *resp_ie, size_t resp_ie_len); int cfg80211_mgd_wext_connect(struct cfg80211_registered_device *rdev, @@ -408,6 +413,7 @@ void cfg80211_sme_failed_assoc(struct wireless_dev *wdev); bool cfg80211_sme_failed_reassoc(struct wireless_dev *wdev); /* internal helpers */ +bool cfg80211_supported_cipher_suite(struct wiphy *wiphy, u32 cipher); int cfg80211_validate_key_settings(struct cfg80211_registered_device *rdev, struct key_params *params, int key_idx, bool pairwise, const u8 *mac_addr); diff --git a/net/wireless/genregdb.awk b/net/wireless/genregdb.awk index 53c143f5e770..9392f8cbb901 100644 --- a/net/wireless/genregdb.awk +++ b/net/wireless/genregdb.awk @@ -7,10 +7,17 @@ # # Copyright 2009 John W. Linville <linville@tuxdriver.com> # -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. +# Permission to use, copy, modify, and/or distribute this software for any +# purpose with or without fee is hereby granted, provided that the above +# copyright notice and this permission notice appear in all copies. # +# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. BEGIN { active = 0 diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c index f33fbb79437c..30f20fe4a5fe 100644 --- a/net/wireless/ibss.c +++ b/net/wireless/ibss.c @@ -7,6 +7,7 @@ #include <linux/etherdevice.h> #include <linux/if_arp.h> #include <linux/slab.h> +#include <linux/export.h> #include <net/cfg80211.h> #include "wext-compat.h" #include "nl80211.h" diff --git a/net/wireless/lib80211.c b/net/wireless/lib80211.c index 3268fac5ab22..a55c27b75ee5 100644 --- a/net/wireless/lib80211.c +++ b/net/wireless/lib80211.c @@ -41,6 +41,11 @@ struct lib80211_crypto_alg { static LIST_HEAD(lib80211_crypto_algs); static DEFINE_SPINLOCK(lib80211_crypto_lock); +static void lib80211_crypt_deinit_entries(struct lib80211_crypt_info *info, + int force); +static void lib80211_crypt_quiescing(struct lib80211_crypt_info *info); +static void lib80211_crypt_deinit_handler(unsigned long data); + const char *print_ssid(char *buf, const char *ssid, u8 ssid_len) { const char *s = ssid; @@ -111,7 +116,8 @@ void lib80211_crypt_info_free(struct lib80211_crypt_info *info) } EXPORT_SYMBOL(lib80211_crypt_info_free); -void lib80211_crypt_deinit_entries(struct lib80211_crypt_info *info, int force) +static void lib80211_crypt_deinit_entries(struct lib80211_crypt_info *info, + int force) { struct lib80211_crypt_data *entry, *next; unsigned long flags; @@ -131,10 +137,9 @@ void lib80211_crypt_deinit_entries(struct lib80211_crypt_info *info, int force) } spin_unlock_irqrestore(info->lock, flags); } -EXPORT_SYMBOL(lib80211_crypt_deinit_entries); /* After this, crypt_deinit_list won't accept new members */ -void lib80211_crypt_quiescing(struct lib80211_crypt_info *info) +static void lib80211_crypt_quiescing(struct lib80211_crypt_info *info) { unsigned long flags; @@ -142,9 +147,8 @@ void lib80211_crypt_quiescing(struct lib80211_crypt_info *info) info->crypt_quiesced = 1; spin_unlock_irqrestore(info->lock, flags); } -EXPORT_SYMBOL(lib80211_crypt_quiescing); -void lib80211_crypt_deinit_handler(unsigned long data) +static void lib80211_crypt_deinit_handler(unsigned long data) { struct lib80211_crypt_info *info = (struct lib80211_crypt_info *)data; unsigned long flags; @@ -160,7 +164,6 @@ void lib80211_crypt_deinit_handler(unsigned long data) } spin_unlock_irqrestore(info->lock, flags); } -EXPORT_SYMBOL(lib80211_crypt_deinit_handler); void lib80211_crypt_delayed_deinit(struct lib80211_crypt_info *info, struct lib80211_crypt_data **crypt) diff --git a/net/wireless/lib80211_crypt_ccmp.c b/net/wireless/lib80211_crypt_ccmp.c index dacb3b4b1bdb..755738d26bb4 100644 --- a/net/wireless/lib80211_crypt_ccmp.c +++ b/net/wireless/lib80211_crypt_ccmp.c @@ -77,8 +77,6 @@ static void *lib80211_ccmp_init(int key_idx) priv->tfm = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC); if (IS_ERR(priv->tfm)) { - printk(KERN_DEBUG "lib80211_crypt_ccmp: could not allocate " - "crypto API aes\n"); priv->tfm = NULL; goto fail; } diff --git a/net/wireless/lib80211_crypt_tkip.c b/net/wireless/lib80211_crypt_tkip.c index 7ea4f2b0770e..38734846c19e 100644 --- a/net/wireless/lib80211_crypt_tkip.c +++ b/net/wireless/lib80211_crypt_tkip.c @@ -101,7 +101,6 @@ static void *lib80211_tkip_init(int key_idx) priv->tx_tfm_arc4 = crypto_alloc_blkcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC); if (IS_ERR(priv->tx_tfm_arc4)) { - printk(KERN_DEBUG pr_fmt("could not allocate crypto API arc4\n")); priv->tx_tfm_arc4 = NULL; goto fail; } @@ -109,7 +108,6 @@ static void *lib80211_tkip_init(int key_idx) priv->tx_tfm_michael = crypto_alloc_hash("michael_mic", 0, CRYPTO_ALG_ASYNC); if (IS_ERR(priv->tx_tfm_michael)) { - printk(KERN_DEBUG pr_fmt("could not allocate crypto API michael_mic\n")); priv->tx_tfm_michael = NULL; goto fail; } @@ -117,7 +115,6 @@ static void *lib80211_tkip_init(int key_idx) priv->rx_tfm_arc4 = crypto_alloc_blkcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC); if (IS_ERR(priv->rx_tfm_arc4)) { - printk(KERN_DEBUG pr_fmt("could not allocate crypto API arc4\n")); priv->rx_tfm_arc4 = NULL; goto fail; } @@ -125,7 +122,6 @@ static void *lib80211_tkip_init(int key_idx) priv->rx_tfm_michael = crypto_alloc_hash("michael_mic", 0, CRYPTO_ALG_ASYNC); if (IS_ERR(priv->rx_tfm_michael)) { - printk(KERN_DEBUG pr_fmt("could not allocate crypto API michael_mic\n")); priv->rx_tfm_michael = NULL; goto fail; } diff --git a/net/wireless/lib80211_crypt_wep.c b/net/wireless/lib80211_crypt_wep.c index 2f265e033ae2..c1304018fc1c 100644 --- a/net/wireless/lib80211_crypt_wep.c +++ b/net/wireless/lib80211_crypt_wep.c @@ -50,16 +50,12 @@ static void *lib80211_wep_init(int keyidx) priv->tx_tfm = crypto_alloc_blkcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC); if (IS_ERR(priv->tx_tfm)) { - printk(KERN_DEBUG "lib80211_crypt_wep: could not allocate " - "crypto API arc4\n"); priv->tx_tfm = NULL; goto fail; } priv->rx_tfm = crypto_alloc_blkcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC); if (IS_ERR(priv->rx_tfm)) { - printk(KERN_DEBUG "lib80211_crypt_wep: could not allocate " - "crypto API arc4\n"); priv->rx_tfm = NULL; goto fail; } diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c index 5c116083eeca..8c550df13037 100644 --- a/net/wireless/mesh.c +++ b/net/wireless/mesh.c @@ -1,4 +1,5 @@ #include <linux/ieee80211.h> +#include <linux/export.h> #include <net/cfg80211.h> #include "nl80211.h" #include "core.h" @@ -12,12 +13,14 @@ #define MESH_HOLD_T 100 #define MESH_PATH_TIMEOUT 5000 +#define MESH_RANN_INTERVAL 5000 /* * Minimum interval between two consecutive PREQs originated by the same * interface */ #define MESH_PREQ_MIN_INT 10 +#define MESH_PERR_MIN_INT 100 #define MESH_DIAM_TRAVERSAL_TIME 50 /* @@ -45,10 +48,13 @@ const struct mesh_config default_mesh_config = { .dot11MeshMaxPeerLinks = MESH_MAX_ESTAB_PLINKS, .dot11MeshHWMPactivePathTimeout = MESH_PATH_TIMEOUT, .dot11MeshHWMPpreqMinInterval = MESH_PREQ_MIN_INT, + .dot11MeshHWMPperrMinInterval = MESH_PERR_MIN_INT, .dot11MeshHWMPnetDiameterTraversalTime = MESH_DIAM_TRAVERSAL_TIME, .dot11MeshHWMPmaxPREQretries = MESH_MAX_PREQ_RETRIES, .path_refresh_time = MESH_PATH_REFRESH_TIME, .min_discovery_timeout = MESH_MIN_DISCOVERY_TIMEOUT, + .dot11MeshHWMPRannInterval = MESH_RANN_INTERVAL, + .dot11MeshGateAnnouncementProtocol = false, }; const struct mesh_setup default_mesh_setup = { diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index 832f6574e4ed..438dfc105b4a 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -501,13 +501,32 @@ int cfg80211_mlme_auth(struct cfg80211_registered_device *rdev, return err; } +/* Do a logical ht_capa &= ht_capa_mask. */ +void cfg80211_oper_and_ht_capa(struct ieee80211_ht_cap *ht_capa, + const struct ieee80211_ht_cap *ht_capa_mask) +{ + int i; + u8 *p1, *p2; + if (!ht_capa_mask) { + memset(ht_capa, 0, sizeof(*ht_capa)); + return; + } + + p1 = (u8*)(ht_capa); + p2 = (u8*)(ht_capa_mask); + for (i = 0; i<sizeof(*ht_capa); i++) + p1[i] &= p2[i]; +} + int __cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev, struct net_device *dev, struct ieee80211_channel *chan, const u8 *bssid, const u8 *prev_bssid, const u8 *ssid, int ssid_len, const u8 *ie, int ie_len, bool use_mfp, - struct cfg80211_crypto_settings *crypt) + struct cfg80211_crypto_settings *crypt, + u32 assoc_flags, struct ieee80211_ht_cap *ht_capa, + struct ieee80211_ht_cap *ht_capa_mask) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_assoc_request req; @@ -537,6 +556,15 @@ int __cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev, memcpy(&req.crypto, crypt, sizeof(req.crypto)); req.use_mfp = use_mfp; req.prev_bssid = prev_bssid; + req.flags = assoc_flags; + if (ht_capa) + memcpy(&req.ht_capa, ht_capa, sizeof(req.ht_capa)); + if (ht_capa_mask) + memcpy(&req.ht_capa_mask, ht_capa_mask, + sizeof(req.ht_capa_mask)); + cfg80211_oper_and_ht_capa(&req.ht_capa_mask, + rdev->wiphy.ht_capa_mod_mask); + req.bss = cfg80211_get_bss(&rdev->wiphy, chan, bssid, ssid, ssid_len, WLAN_CAPABILITY_ESS, WLAN_CAPABILITY_ESS); if (!req.bss) { @@ -574,14 +602,17 @@ int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev, const u8 *bssid, const u8 *prev_bssid, const u8 *ssid, int ssid_len, const u8 *ie, int ie_len, bool use_mfp, - struct cfg80211_crypto_settings *crypt) + struct cfg80211_crypto_settings *crypt, + u32 assoc_flags, struct ieee80211_ht_cap *ht_capa, + struct ieee80211_ht_cap *ht_capa_mask) { struct wireless_dev *wdev = dev->ieee80211_ptr; int err; wdev_lock(wdev); err = __cfg80211_mlme_assoc(rdev, dev, chan, bssid, prev_bssid, - ssid, ssid_len, ie, ie_len, use_mfp, crypt); + ssid, ssid_len, ie, ie_len, use_mfp, crypt, + assoc_flags, ht_capa, ht_capa_mask); wdev_unlock(wdev); return err; @@ -879,6 +910,9 @@ void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlpid) } spin_unlock_bh(&wdev->mgmt_registrations_lock); + + if (nlpid == wdev->ap_unexpected_nlpid) + wdev->ap_unexpected_nlpid = 0; } void cfg80211_mlme_purge_registrations(struct wireless_dev *wdev) @@ -900,7 +934,8 @@ int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev, struct ieee80211_channel *chan, bool offchan, enum nl80211_channel_type channel_type, bool channel_type_valid, unsigned int wait, - const u8 *buf, size_t len, u64 *cookie) + const u8 *buf, size_t len, bool no_cck, + bool dont_wait_for_ack, u64 *cookie) { struct wireless_dev *wdev = dev->ieee80211_ptr; const struct ieee80211_mgmt *mgmt; @@ -991,7 +1026,8 @@ int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev, /* Transmit the Action frame as requested by user space */ return rdev->ops->mgmt_tx(&rdev->wiphy, dev, chan, offchan, channel_type, channel_type_valid, - wait, buf, len, cookie); + wait, buf, len, no_cck, dont_wait_for_ack, + cookie); } bool cfg80211_rx_mgmt(struct net_device *dev, int freq, const u8 *buf, @@ -1095,3 +1131,41 @@ void cfg80211_gtk_rekey_notify(struct net_device *dev, const u8 *bssid, nl80211_gtk_rekey_notify(rdev, dev, bssid, replay_ctr, gfp); } EXPORT_SYMBOL(cfg80211_gtk_rekey_notify); + +void cfg80211_pmksa_candidate_notify(struct net_device *dev, int index, + const u8 *bssid, bool preauth, gfp_t gfp) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct wiphy *wiphy = wdev->wiphy; + struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + + nl80211_pmksa_candidate_notify(rdev, dev, index, bssid, preauth, gfp); +} +EXPORT_SYMBOL(cfg80211_pmksa_candidate_notify); + +bool cfg80211_rx_spurious_frame(struct net_device *dev, + const u8 *addr, gfp_t gfp) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + + if (WARN_ON(wdev->iftype != NL80211_IFTYPE_AP && + wdev->iftype != NL80211_IFTYPE_P2P_GO)) + return false; + + return nl80211_unexpected_frame(dev, addr, gfp); +} +EXPORT_SYMBOL(cfg80211_rx_spurious_frame); + +bool cfg80211_rx_unexpected_4addr_frame(struct net_device *dev, + const u8 *addr, gfp_t gfp) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + + if (WARN_ON(wdev->iftype != NL80211_IFTYPE_AP && + wdev->iftype != NL80211_IFTYPE_P2P_GO && + wdev->iftype != NL80211_IFTYPE_AP_VLAN)) + return false; + + return nl80211_unexpected_4addr_frame(dev, addr, gfp); +} +EXPORT_SYMBOL(cfg80211_rx_unexpected_4addr_frame); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index ea40d540a990..afeea32e04ad 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -23,6 +23,12 @@ #include "nl80211.h" #include "reg.h" +static bool nl80211_valid_auth_type(enum nl80211_auth_type auth_type); +static int nl80211_crypto_settings(struct cfg80211_registered_device *rdev, + struct genl_info *info, + struct cfg80211_crypto_settings *settings, + int cipher_limit); + static int nl80211_pre_doit(struct genl_ops *ops, struct sk_buff *skb, struct genl_info *info); static void nl80211_post_doit(struct genl_ops *ops, struct sk_buff *skb, @@ -41,22 +47,21 @@ static struct genl_family nl80211_fam = { }; /* internal helper: get rdev and dev */ -static int get_rdev_dev_by_info_ifindex(struct genl_info *info, - struct cfg80211_registered_device **rdev, - struct net_device **dev) +static int get_rdev_dev_by_ifindex(struct net *netns, struct nlattr **attrs, + struct cfg80211_registered_device **rdev, + struct net_device **dev) { - struct nlattr **attrs = info->attrs; int ifindex; if (!attrs[NL80211_ATTR_IFINDEX]) return -EINVAL; ifindex = nla_get_u32(attrs[NL80211_ATTR_IFINDEX]); - *dev = dev_get_by_index(genl_info_net(info), ifindex); + *dev = dev_get_by_index(netns, ifindex); if (!*dev) return -ENODEV; - *rdev = cfg80211_get_dev_from_ifindex(genl_info_net(info), ifindex); + *rdev = cfg80211_get_dev_from_ifindex(netns, ifindex); if (IS_ERR(*rdev)) { dev_put(*dev); return PTR_ERR(*rdev); @@ -83,8 +88,8 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = { [NL80211_ATTR_IFINDEX] = { .type = NLA_U32 }, [NL80211_ATTR_IFNAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ-1 }, - [NL80211_ATTR_MAC] = { .type = NLA_BINARY, .len = ETH_ALEN }, - [NL80211_ATTR_PREV_BSSID] = { .type = NLA_BINARY, .len = ETH_ALEN }, + [NL80211_ATTR_MAC] = { .len = ETH_ALEN }, + [NL80211_ATTR_PREV_BSSID] = { .len = ETH_ALEN }, [NL80211_ATTR_KEY] = { .type = NLA_NESTED, }, [NL80211_ATTR_KEY_DATA] = { .type = NLA_BINARY, @@ -92,7 +97,7 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = { [NL80211_ATTR_KEY_IDX] = { .type = NLA_U8 }, [NL80211_ATTR_KEY_CIPHER] = { .type = NLA_U32 }, [NL80211_ATTR_KEY_DEFAULT] = { .type = NLA_FLAG }, - [NL80211_ATTR_KEY_SEQ] = { .type = NLA_BINARY, .len = 8 }, + [NL80211_ATTR_KEY_SEQ] = { .type = NLA_BINARY, .len = 16 }, [NL80211_ATTR_KEY_TYPE] = { .type = NLA_U32 }, [NL80211_ATTR_BEACON_INTERVAL] = { .type = NLA_U32 }, @@ -126,8 +131,7 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = { [NL80211_ATTR_MESH_CONFIG] = { .type = NLA_NESTED }, [NL80211_ATTR_SUPPORT_MESH_AUTH] = { .type = NLA_FLAG }, - [NL80211_ATTR_HT_CAPABILITY] = { .type = NLA_BINARY, - .len = NL80211_HT_CAPABILITY_LEN }, + [NL80211_ATTR_HT_CAPABILITY] = { .len = NL80211_HT_CAPABILITY_LEN }, [NL80211_ATTR_MGMT_SUBTYPE] = { .type = NLA_U8 }, [NL80211_ATTR_IE] = { .type = NLA_BINARY, @@ -178,6 +182,28 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = { [NL80211_ATTR_SCHED_SCAN_INTERVAL] = { .type = NLA_U32 }, [NL80211_ATTR_REKEY_DATA] = { .type = NLA_NESTED }, [NL80211_ATTR_SCAN_SUPP_RATES] = { .type = NLA_NESTED }, + [NL80211_ATTR_HIDDEN_SSID] = { .type = NLA_U32 }, + [NL80211_ATTR_IE_PROBE_RESP] = { .type = NLA_BINARY, + .len = IEEE80211_MAX_DATA_LEN }, + [NL80211_ATTR_IE_ASSOC_RESP] = { .type = NLA_BINARY, + .len = IEEE80211_MAX_DATA_LEN }, + [NL80211_ATTR_ROAM_SUPPORT] = { .type = NLA_FLAG }, + [NL80211_ATTR_SCHED_SCAN_MATCH] = { .type = NLA_NESTED }, + [NL80211_ATTR_TX_NO_CCK_RATE] = { .type = NLA_FLAG }, + [NL80211_ATTR_TDLS_ACTION] = { .type = NLA_U8 }, + [NL80211_ATTR_TDLS_DIALOG_TOKEN] = { .type = NLA_U8 }, + [NL80211_ATTR_TDLS_OPERATION] = { .type = NLA_U8 }, + [NL80211_ATTR_TDLS_SUPPORT] = { .type = NLA_FLAG }, + [NL80211_ATTR_TDLS_EXTERNAL_SETUP] = { .type = NLA_FLAG }, + [NL80211_ATTR_DONT_WAIT_FOR_ACK] = { .type = NLA_FLAG }, + [NL80211_ATTR_PROBE_RESP] = { .type = NLA_BINARY, + .len = IEEE80211_MAX_DATA_LEN }, + [NL80211_ATTR_DFS_REGION] = { .type = NLA_U8 }, + [NL80211_ATTR_DISABLE_HT] = { .type = NLA_FLAG }, + [NL80211_ATTR_HT_CAPABILITY_MASK] = { + .len = NL80211_HT_CAPABILITY_LEN + }, + [NL80211_ATTR_NOACK_MAP] = { .type = NLA_U16 }, }; /* policy for the key attributes */ @@ -185,7 +211,7 @@ static const struct nla_policy nl80211_key_policy[NL80211_KEY_MAX + 1] = { [NL80211_KEY_DATA] = { .type = NLA_BINARY, .len = WLAN_MAX_KEY_LEN }, [NL80211_KEY_IDX] = { .type = NLA_U8 }, [NL80211_KEY_CIPHER] = { .type = NLA_U32 }, - [NL80211_KEY_SEQ] = { .type = NLA_BINARY, .len = 8 }, + [NL80211_KEY_SEQ] = { .type = NLA_BINARY, .len = 16 }, [NL80211_KEY_DEFAULT] = { .type = NLA_FLAG }, [NL80211_KEY_DEFAULT_MGMT] = { .type = NLA_FLAG }, [NL80211_KEY_TYPE] = { .type = NLA_U32 }, @@ -220,6 +246,12 @@ nl80211_rekey_policy[NUM_NL80211_REKEY_DATA] = { [NL80211_REKEY_DATA_REPLAY_CTR] = { .len = NL80211_REPLAY_CTR_LEN }, }; +static const struct nla_policy +nl80211_match_policy[NL80211_SCHED_SCAN_MATCH_ATTR_MAX + 1] = { + [NL80211_ATTR_SCHED_SCAN_MATCH_SSID] = { .type = NLA_BINARY, + .len = IEEE80211_MAX_SSID_LEN }, +}; + /* ifidx get helper */ static int nl80211_get_ifidx(struct netlink_callback *cb) { @@ -703,11 +735,21 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, dev->wiphy.max_scan_ie_len); NLA_PUT_U16(msg, NL80211_ATTR_MAX_SCHED_SCAN_IE_LEN, dev->wiphy.max_sched_scan_ie_len); + NLA_PUT_U8(msg, NL80211_ATTR_MAX_MATCH_SETS, + dev->wiphy.max_match_sets); if (dev->wiphy.flags & WIPHY_FLAG_IBSS_RSN) NLA_PUT_FLAG(msg, NL80211_ATTR_SUPPORT_IBSS_RSN); if (dev->wiphy.flags & WIPHY_FLAG_MESH_AUTH) NLA_PUT_FLAG(msg, NL80211_ATTR_SUPPORT_MESH_AUTH); + if (dev->wiphy.flags & WIPHY_FLAG_AP_UAPSD) + NLA_PUT_FLAG(msg, NL80211_ATTR_SUPPORT_AP_UAPSD); + if (dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_FW_ROAM) + NLA_PUT_FLAG(msg, NL80211_ATTR_ROAM_SUPPORT); + if (dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_TDLS) + NLA_PUT_FLAG(msg, NL80211_ATTR_TDLS_SUPPORT); + if (dev->wiphy.flags & WIPHY_FLAG_TDLS_EXTERNAL_SETUP) + NLA_PUT_FLAG(msg, NL80211_ATTR_TDLS_EXTERNAL_SETUP); NLA_PUT(msg, NL80211_ATTR_CIPHER_SUITES, sizeof(u32) * dev->wiphy.n_cipher_suites, @@ -724,6 +766,10 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, NLA_PUT_U32(msg, NL80211_ATTR_WIPHY_ANTENNA_AVAIL_RX, dev->wiphy.available_antennas_rx); + if (dev->wiphy.flags & WIPHY_FLAG_AP_PROBE_RESP_OFFLOAD) + NLA_PUT_U32(msg, NL80211_ATTR_PROBE_RESP_OFFLOAD, + dev->wiphy.probe_resp_offload); + if ((dev->wiphy.available_antennas_tx || dev->wiphy.available_antennas_rx) && dev->ops->get_antenna) { u32 tx_ant = 0, rx_ant = 0; @@ -840,7 +886,8 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, CMD(set_pmksa, SET_PMKSA); CMD(del_pmksa, DEL_PMKSA); CMD(flush_pmksa, FLUSH_PMKSA); - CMD(remain_on_channel, REMAIN_ON_CHANNEL); + if (dev->wiphy.flags & WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL) + CMD(remain_on_channel, REMAIN_ON_CHANNEL); CMD(set_bitrate_mask, SET_TX_BITRATE_MASK); CMD(mgmt_tx, FRAME); CMD(mgmt_tx_cancel_wait, FRAME_WAIT_CANCEL); @@ -850,8 +897,22 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, } CMD(set_channel, SET_CHANNEL); CMD(set_wds_peer, SET_WDS_PEER); + if (dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_TDLS) { + CMD(tdls_mgmt, TDLS_MGMT); + CMD(tdls_oper, TDLS_OPER); + } if (dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_SCHED_SCAN) CMD(sched_scan_start, START_SCHED_SCAN); + CMD(probe_client, PROBE_CLIENT); + CMD(set_noack_map, SET_NOACK_MAP); + if (dev->wiphy.flags & WIPHY_FLAG_REPORTS_OBSS) { + i++; + NLA_PUT_U32(msg, i, NL80211_CMD_REGISTER_BEACONS); + } + +#ifdef CONFIG_NL80211_TESTMODE + CMD(testmode_cmd, TESTMODE); +#endif #undef CMD @@ -867,12 +928,12 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, nla_nest_end(msg, nl_cmds); - if (dev->ops->remain_on_channel) + if (dev->ops->remain_on_channel && + dev->wiphy.flags & WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL) NLA_PUT_U32(msg, NL80211_ATTR_MAX_REMAIN_ON_CHANNEL_DURATION, dev->wiphy.max_remain_on_channel_duration); - /* for now at least assume all drivers have it */ - if (dev->ops->mgmt_tx) + if (dev->wiphy.flags & WIPHY_FLAG_OFFCHAN_TX) NLA_PUT_FLAG(msg, NL80211_ATTR_OFFCHANNEL_TX_OK); if (mgmt_stypes) { @@ -970,6 +1031,17 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, if (nl80211_put_iface_combinations(&dev->wiphy, msg)) goto nla_put_failure; + if (dev->wiphy.flags & WIPHY_FLAG_HAVE_AP_SME) + NLA_PUT_U32(msg, NL80211_ATTR_DEVICE_AP_SME, + dev->wiphy.ap_sme_capa); + + NLA_PUT_U32(msg, NL80211_ATTR_FEATURE_FLAGS, dev->wiphy.features); + + if (dev->wiphy.ht_capa_mod_mask) + NLA_PUT(msg, NL80211_ATTR_HT_CAPABILITY_MASK, + sizeof(*dev->wiphy.ht_capa_mod_mask), + dev->wiphy.ht_capa_mod_mask); + return genlmsg_end(msg, hdr); nla_put_failure: @@ -1210,6 +1282,17 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) goto bad_res; } + if (!netdev) { + result = -EINVAL; + goto bad_res; + } + + if (netdev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP && + netdev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) { + result = -EINVAL; + goto bad_res; + } + nla_for_each_nested(nl_txq_params, info->attrs[NL80211_ATTR_WIPHY_TXQ_PARAMS], rem_txq_params) { @@ -1222,6 +1305,7 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) goto bad_res; result = rdev->ops->set_txq_params(&rdev->wiphy, + netdev, &txq_params); if (result) goto bad_res; @@ -1676,6 +1760,23 @@ static int nl80211_del_interface(struct sk_buff *skb, struct genl_info *info) return rdev->ops->del_virtual_intf(&rdev->wiphy, dev); } +static int nl80211_set_noack_map(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct net_device *dev = info->user_ptr[1]; + u16 noack_map; + + if (!info->attrs[NL80211_ATTR_NOACK_MAP]) + return -EINVAL; + + if (!rdev->ops->set_noack_map) + return -EOPNOTSUPP; + + noack_map = nla_get_u16(info->attrs[NL80211_ATTR_NOACK_MAP]); + + return rdev->ops->set_noack_map(&rdev->wiphy, dev, noack_map); +} + struct get_key_cookie { struct sk_buff *msg; int error; @@ -1985,7 +2086,10 @@ static int nl80211_addset_beacon(struct sk_buff *skb, struct genl_info *info) struct beacon_parameters params; int haveinfo = 0, err; - if (!is_valid_ie_attr(info->attrs[NL80211_ATTR_BEACON_TAIL])) + if (!is_valid_ie_attr(info->attrs[NL80211_ATTR_BEACON_TAIL]) || + !is_valid_ie_attr(info->attrs[NL80211_ATTR_IE]) || + !is_valid_ie_attr(info->attrs[NL80211_ATTR_IE_PROBE_RESP]) || + !is_valid_ie_attr(info->attrs[NL80211_ATTR_IE_ASSOC_RESP])) return -EINVAL; if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP && @@ -2011,6 +2115,49 @@ static int nl80211_addset_beacon(struct sk_buff *skb, struct genl_info *info) if (err) return err; + /* + * In theory, some of these attributes could be required for + * NEW_BEACON, but since they were not used when the command was + * originally added, keep them optional for old user space + * programs to work with drivers that do not need the additional + * information. + */ + if (info->attrs[NL80211_ATTR_SSID]) { + params.ssid = nla_data(info->attrs[NL80211_ATTR_SSID]); + params.ssid_len = + nla_len(info->attrs[NL80211_ATTR_SSID]); + if (params.ssid_len == 0 || + params.ssid_len > IEEE80211_MAX_SSID_LEN) + return -EINVAL; + } + + if (info->attrs[NL80211_ATTR_HIDDEN_SSID]) { + params.hidden_ssid = nla_get_u32( + info->attrs[NL80211_ATTR_HIDDEN_SSID]); + if (params.hidden_ssid != + NL80211_HIDDEN_SSID_NOT_IN_USE && + params.hidden_ssid != + NL80211_HIDDEN_SSID_ZERO_LEN && + params.hidden_ssid != + NL80211_HIDDEN_SSID_ZERO_CONTENTS) + return -EINVAL; + } + + params.privacy = !!info->attrs[NL80211_ATTR_PRIVACY]; + + if (info->attrs[NL80211_ATTR_AUTH_TYPE]) { + params.auth_type = nla_get_u32( + info->attrs[NL80211_ATTR_AUTH_TYPE]); + if (!nl80211_valid_auth_type(params.auth_type)) + return -EINVAL; + } else + params.auth_type = NL80211_AUTHTYPE_AUTOMATIC; + + err = nl80211_crypto_settings(rdev, info, ¶ms.crypto, + NL80211_MAX_NR_CIPHER_SUITES); + if (err) + return err; + call = rdev->ops->add_beacon; break; case NL80211_CMD_SET_BEACON: @@ -2041,6 +2188,32 @@ static int nl80211_addset_beacon(struct sk_buff *skb, struct genl_info *info) if (!haveinfo) return -EINVAL; + if (info->attrs[NL80211_ATTR_IE]) { + params.beacon_ies = nla_data(info->attrs[NL80211_ATTR_IE]); + params.beacon_ies_len = nla_len(info->attrs[NL80211_ATTR_IE]); + } + + if (info->attrs[NL80211_ATTR_IE_PROBE_RESP]) { + params.proberesp_ies = + nla_data(info->attrs[NL80211_ATTR_IE_PROBE_RESP]); + params.proberesp_ies_len = + nla_len(info->attrs[NL80211_ATTR_IE_PROBE_RESP]); + } + + if (info->attrs[NL80211_ATTR_IE_ASSOC_RESP]) { + params.assocresp_ies = + nla_data(info->attrs[NL80211_ATTR_IE_ASSOC_RESP]); + params.assocresp_ies_len = + nla_len(info->attrs[NL80211_ATTR_IE_ASSOC_RESP]); + } + + if (info->attrs[NL80211_ATTR_PROBE_RESP]) { + params.probe_resp = + nla_data(info->attrs[NL80211_ATTR_PROBE_RESP]); + params.probe_resp_len = + nla_len(info->attrs[NL80211_ATTR_PROBE_RESP]); + } + err = call(&rdev->wiphy, dev, ¶ms); if (!err && params.interval) wdev->beacon_interval = params.interval; @@ -2073,9 +2246,11 @@ static const struct nla_policy sta_flags_policy[NL80211_STA_FLAG_MAX + 1] = { [NL80211_STA_FLAG_WME] = { .type = NLA_FLAG }, [NL80211_STA_FLAG_MFP] = { .type = NLA_FLAG }, [NL80211_STA_FLAG_AUTHENTICATED] = { .type = NLA_FLAG }, + [NL80211_STA_FLAG_TDLS_PEER] = { .type = NLA_FLAG }, }; static int parse_station_flags(struct genl_info *info, + enum nl80211_iftype iftype, struct station_parameters *params) { struct nlattr *flags[NL80211_STA_FLAG_MAX + 1]; @@ -2109,8 +2284,33 @@ static int parse_station_flags(struct genl_info *info, nla, sta_flags_policy)) return -EINVAL; - params->sta_flags_mask = (1 << __NL80211_STA_FLAG_AFTER_LAST) - 1; - params->sta_flags_mask &= ~1; + /* + * Only allow certain flags for interface types so that + * other attributes are silently ignored. Remember that + * this is backward compatibility code with old userspace + * and shouldn't be hit in other cases anyway. + */ + switch (iftype) { + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_AP_VLAN: + case NL80211_IFTYPE_P2P_GO: + params->sta_flags_mask = BIT(NL80211_STA_FLAG_AUTHORIZED) | + BIT(NL80211_STA_FLAG_SHORT_PREAMBLE) | + BIT(NL80211_STA_FLAG_WME) | + BIT(NL80211_STA_FLAG_MFP); + break; + case NL80211_IFTYPE_P2P_CLIENT: + case NL80211_IFTYPE_STATION: + params->sta_flags_mask = BIT(NL80211_STA_FLAG_AUTHORIZED) | + BIT(NL80211_STA_FLAG_TDLS_PEER); + break; + case NL80211_IFTYPE_MESH_POINT: + params->sta_flags_mask = BIT(NL80211_STA_FLAG_AUTHENTICATED) | + BIT(NL80211_STA_FLAG_MFP) | + BIT(NL80211_STA_FLAG_AUTHORIZED); + default: + return -EINVAL; + } for (flag = 1; flag <= NL80211_STA_FLAG_MAX; flag++) if (flags[flag]) @@ -2216,6 +2416,9 @@ static int nl80211_send_station(struct sk_buff *msg, u32 pid, u32 seq, if (sinfo->filled & STATION_INFO_TX_FAILED) NLA_PUT_U32(msg, NL80211_STA_INFO_TX_FAILED, sinfo->tx_failed); + if (sinfo->filled & STATION_INFO_BEACON_LOSS_COUNT) + NLA_PUT_U32(msg, NL80211_STA_INFO_BEACON_LOSS, + sinfo->beacon_loss_count); if (sinfo->filled & STATION_INFO_BSS_PARAM) { bss_param = nla_nest_start(msg, NL80211_STA_INFO_BSS_PARAM); if (!bss_param) @@ -2235,8 +2438,16 @@ static int nl80211_send_station(struct sk_buff *msg, u32 pid, u32 seq, nla_nest_end(msg, bss_param); } + if (sinfo->filled & STATION_INFO_STA_FLAGS) + NLA_PUT(msg, NL80211_STA_INFO_STA_FLAGS, + sizeof(struct nl80211_sta_flag_update), + &sinfo->sta_flags); nla_nest_end(msg, sinfoattr); + if (sinfo->filled & STATION_INFO_ASSOC_REQ_IES) + NLA_PUT(msg, NL80211_ATTR_IE, sinfo->assoc_req_ies_len, + sinfo->assoc_req_ies); + return genlmsg_end(msg, hdr); nla_put_failure: @@ -2264,6 +2475,7 @@ static int nl80211_dump_station(struct sk_buff *skb, } while (1) { + memset(&sinfo, 0, sizeof(sinfo)); err = dev->ops->dump_station(&dev->wiphy, netdev, sta_idx, mac_addr, &sinfo); if (err == -ENOENT) @@ -2330,26 +2542,34 @@ static int nl80211_get_station(struct sk_buff *skb, struct genl_info *info) /* * Get vlan interface making sure it is running and on the right wiphy. */ -static int get_vlan(struct genl_info *info, - struct cfg80211_registered_device *rdev, - struct net_device **vlan) +static struct net_device *get_vlan(struct genl_info *info, + struct cfg80211_registered_device *rdev) { struct nlattr *vlanattr = info->attrs[NL80211_ATTR_STA_VLAN]; - *vlan = NULL; - - if (vlanattr) { - *vlan = dev_get_by_index(genl_info_net(info), - nla_get_u32(vlanattr)); - if (!*vlan) - return -ENODEV; - if (!(*vlan)->ieee80211_ptr) - return -EINVAL; - if ((*vlan)->ieee80211_ptr->wiphy != &rdev->wiphy) - return -EINVAL; - if (!netif_running(*vlan)) - return -ENETDOWN; + struct net_device *v; + int ret; + + if (!vlanattr) + return NULL; + + v = dev_get_by_index(genl_info_net(info), nla_get_u32(vlanattr)); + if (!v) + return ERR_PTR(-ENODEV); + + if (!v->ieee80211_ptr || v->ieee80211_ptr->wiphy != &rdev->wiphy) { + ret = -EINVAL; + goto error; } - return 0; + + if (!netif_running(v)) { + ret = -ENETDOWN; + goto error; + } + + return v; + error: + dev_put(v); + return ERR_PTR(ret); } static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) @@ -2388,7 +2608,10 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) params.ht_capa = nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY]); - if (parse_station_flags(info, ¶ms)) + if (!rdev->ops->change_station) + return -EOPNOTSUPP; + + if (parse_station_flags(info, dev->ieee80211_ptr->iftype, ¶ms)) return -EINVAL; if (info->attrs[NL80211_ATTR_STA_PLINK_ACTION]) @@ -2399,72 +2622,96 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) params.plink_state = nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_STATE]); - err = get_vlan(info, rdev, ¶ms.vlan); - if (err) - goto out; - - /* validate settings */ - err = 0; - switch (dev->ieee80211_ptr->iftype) { case NL80211_IFTYPE_AP: case NL80211_IFTYPE_AP_VLAN: case NL80211_IFTYPE_P2P_GO: /* disallow mesh-specific things */ if (params.plink_action) - err = -EINVAL; + return -EINVAL; + + /* TDLS can't be set, ... */ + if (params.sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER)) + return -EINVAL; + /* + * ... but don't bother the driver with it. This works around + * a hostapd/wpa_supplicant issue -- it always includes the + * TLDS_PEER flag in the mask even for AP mode. + */ + params.sta_flags_mask &= ~BIT(NL80211_STA_FLAG_TDLS_PEER); + + /* accept only the listed bits */ + if (params.sta_flags_mask & + ~(BIT(NL80211_STA_FLAG_AUTHORIZED) | + BIT(NL80211_STA_FLAG_SHORT_PREAMBLE) | + BIT(NL80211_STA_FLAG_WME) | + BIT(NL80211_STA_FLAG_MFP))) + return -EINVAL; + + /* must be last in here for error handling */ + params.vlan = get_vlan(info, rdev); + if (IS_ERR(params.vlan)) + return PTR_ERR(params.vlan); break; case NL80211_IFTYPE_P2P_CLIENT: case NL80211_IFTYPE_STATION: - /* disallow everything but AUTHORIZED flag */ + /* disallow things sta doesn't support */ if (params.plink_action) - err = -EINVAL; - if (params.vlan) - err = -EINVAL; - if (params.supported_rates) - err = -EINVAL; + return -EINVAL; if (params.ht_capa) - err = -EINVAL; + return -EINVAL; if (params.listen_interval >= 0) - err = -EINVAL; + return -EINVAL; + /* + * Don't allow userspace to change the TDLS_PEER flag, + * but silently ignore attempts to change it since we + * don't have state here to verify that it doesn't try + * to change the flag. + */ + params.sta_flags_mask &= ~BIT(NL80211_STA_FLAG_TDLS_PEER); + + /* reject any changes other than AUTHORIZED */ if (params.sta_flags_mask & ~BIT(NL80211_STA_FLAG_AUTHORIZED)) - err = -EINVAL; + return -EINVAL; break; case NL80211_IFTYPE_MESH_POINT: /* disallow things mesh doesn't support */ if (params.vlan) - err = -EINVAL; + return -EINVAL; if (params.ht_capa) - err = -EINVAL; + return -EINVAL; if (params.listen_interval >= 0) - err = -EINVAL; + return -EINVAL; + /* + * No special handling for TDLS here -- the userspace + * mesh code doesn't have this bug. + */ if (params.sta_flags_mask & ~(BIT(NL80211_STA_FLAG_AUTHENTICATED) | BIT(NL80211_STA_FLAG_MFP) | BIT(NL80211_STA_FLAG_AUTHORIZED))) - err = -EINVAL; + return -EINVAL; break; default: - err = -EINVAL; + return -EOPNOTSUPP; } - if (err) - goto out; - - if (!rdev->ops->change_station) { - err = -EOPNOTSUPP; - goto out; - } + /* be aware of params.vlan when changing code here */ err = rdev->ops->change_station(&rdev->wiphy, dev, mac_addr, ¶ms); - out: if (params.vlan) dev_put(params.vlan); return err; } +static struct nla_policy +nl80211_sta_wme_policy[NL80211_STA_WME_MAX + 1] __read_mostly = { + [NL80211_STA_WME_UAPSD_QUEUES] = { .type = NLA_U8 }, + [NL80211_STA_WME_MAX_SP] = { .type = NLA_U8 }, +}; + static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; @@ -2507,30 +2754,81 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) params.plink_action = nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_ACTION]); - if (parse_station_flags(info, ¶ms)) - return -EINVAL; + if (!rdev->ops->add_station) + return -EOPNOTSUPP; - if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP && - dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP_VLAN && - dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT && - dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) + if (parse_station_flags(info, dev->ieee80211_ptr->iftype, ¶ms)) return -EINVAL; - err = get_vlan(info, rdev, ¶ms.vlan); - if (err) - goto out; + switch (dev->ieee80211_ptr->iftype) { + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_AP_VLAN: + case NL80211_IFTYPE_P2P_GO: + /* parse WME attributes if sta is WME capable */ + if ((rdev->wiphy.flags & WIPHY_FLAG_AP_UAPSD) && + (params.sta_flags_set & BIT(NL80211_STA_FLAG_WME)) && + info->attrs[NL80211_ATTR_STA_WME]) { + struct nlattr *tb[NL80211_STA_WME_MAX + 1]; + struct nlattr *nla; + + nla = info->attrs[NL80211_ATTR_STA_WME]; + err = nla_parse_nested(tb, NL80211_STA_WME_MAX, nla, + nl80211_sta_wme_policy); + if (err) + return err; - /* validate settings */ - err = 0; + if (tb[NL80211_STA_WME_UAPSD_QUEUES]) + params.uapsd_queues = + nla_get_u8(tb[NL80211_STA_WME_UAPSD_QUEUES]); + if (params.uapsd_queues & + ~IEEE80211_WMM_IE_STA_QOSINFO_AC_MASK) + return -EINVAL; - if (!rdev->ops->add_station) { - err = -EOPNOTSUPP; - goto out; + if (tb[NL80211_STA_WME_MAX_SP]) + params.max_sp = + nla_get_u8(tb[NL80211_STA_WME_MAX_SP]); + + if (params.max_sp & + ~IEEE80211_WMM_IE_STA_QOSINFO_SP_MASK) + return -EINVAL; + + params.sta_modify_mask |= STATION_PARAM_APPLY_UAPSD; + } + /* TDLS peers cannot be added */ + if (params.sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER)) + return -EINVAL; + /* but don't bother the driver with it */ + params.sta_flags_mask &= ~BIT(NL80211_STA_FLAG_TDLS_PEER); + + /* must be last in here for error handling */ + params.vlan = get_vlan(info, rdev); + if (IS_ERR(params.vlan)) + return PTR_ERR(params.vlan); + break; + case NL80211_IFTYPE_MESH_POINT: + /* TDLS peers cannot be added */ + if (params.sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER)) + return -EINVAL; + break; + case NL80211_IFTYPE_STATION: + /* Only TDLS peers can be added */ + if (!(params.sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER))) + return -EINVAL; + /* Can only add if TDLS ... */ + if (!(rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_TDLS)) + return -EOPNOTSUPP; + /* ... with external setup is supported */ + if (!(rdev->wiphy.flags & WIPHY_FLAG_TDLS_EXTERNAL_SETUP)) + return -EOPNOTSUPP; + break; + default: + return -EOPNOTSUPP; } + /* be aware of params.vlan when changing code here */ + err = rdev->ops->add_station(&rdev->wiphy, dev, mac_addr, ¶ms); - out: if (params.vlan) dev_put(params.vlan); return err; @@ -2951,10 +3249,16 @@ static int nl80211_get_mesh_config(struct sk_buff *skb, cur_params.dot11MeshHWMPactivePathTimeout); NLA_PUT_U16(msg, NL80211_MESHCONF_HWMP_PREQ_MIN_INTERVAL, cur_params.dot11MeshHWMPpreqMinInterval); + NLA_PUT_U16(msg, NL80211_MESHCONF_HWMP_PERR_MIN_INTERVAL, + cur_params.dot11MeshHWMPperrMinInterval); NLA_PUT_U16(msg, NL80211_MESHCONF_HWMP_NET_DIAM_TRVS_TIME, cur_params.dot11MeshHWMPnetDiameterTraversalTime); NLA_PUT_U8(msg, NL80211_MESHCONF_HWMP_ROOTMODE, cur_params.dot11MeshHWMPRootMode); + NLA_PUT_U16(msg, NL80211_MESHCONF_HWMP_RANN_INTERVAL, + cur_params.dot11MeshHWMPRannInterval); + NLA_PUT_U8(msg, NL80211_MESHCONF_GATE_ANNOUNCEMENTS, + cur_params.dot11MeshGateAnnouncementProtocol); nla_nest_end(msg, pinfoattr); genlmsg_end(msg, hdr); return genlmsg_reply(msg, info); @@ -2981,7 +3285,11 @@ static const struct nla_policy nl80211_meshconf_params_policy[NL80211_MESHCONF_A [NL80211_MESHCONF_MIN_DISCOVERY_TIMEOUT] = { .type = NLA_U16 }, [NL80211_MESHCONF_HWMP_ACTIVE_PATH_TIMEOUT] = { .type = NLA_U32 }, [NL80211_MESHCONF_HWMP_PREQ_MIN_INTERVAL] = { .type = NLA_U16 }, + [NL80211_MESHCONF_HWMP_PERR_MIN_INTERVAL] = { .type = NLA_U16 }, [NL80211_MESHCONF_HWMP_NET_DIAM_TRVS_TIME] = { .type = NLA_U16 }, + [NL80211_MESHCONF_HWMP_ROOTMODE] = { .type = NLA_U8 }, + [NL80211_MESHCONF_HWMP_RANN_INTERVAL] = { .type = NLA_U16 }, + [NL80211_MESHCONF_GATE_ANNOUNCEMENTS] = { .type = NLA_U8 }, }; static const struct nla_policy @@ -3052,6 +3360,9 @@ do {\ FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPpreqMinInterval, mask, NL80211_MESHCONF_HWMP_PREQ_MIN_INTERVAL, nla_get_u16); + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPperrMinInterval, + mask, NL80211_MESHCONF_HWMP_PERR_MIN_INTERVAL, + nla_get_u16); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPnetDiameterTraversalTime, mask, NL80211_MESHCONF_HWMP_NET_DIAM_TRVS_TIME, @@ -3060,6 +3371,14 @@ do {\ dot11MeshHWMPRootMode, mask, NL80211_MESHCONF_HWMP_ROOTMODE, nla_get_u8); + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, + dot11MeshHWMPRannInterval, mask, + NL80211_MESHCONF_HWMP_RANN_INTERVAL, + nla_get_u16); + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, + dot11MeshGateAnnouncementProtocol, mask, + NL80211_MESHCONF_GATE_ANNOUNCEMENTS, + nla_get_u8); if (mask_out) *mask_out = mask; @@ -3166,6 +3485,9 @@ static int nl80211_get_reg(struct sk_buff *skb, struct genl_info *info) NLA_PUT_STRING(msg, NL80211_ATTR_REG_ALPHA2, cfg80211_regdomain->alpha2); + if (cfg80211_regdomain->dfs_region) + NLA_PUT_U8(msg, NL80211_ATTR_DFS_REGION, + cfg80211_regdomain->dfs_region); nl_reg_rules = nla_nest_start(msg, NL80211_ATTR_REG_RULES); if (!nl_reg_rules) @@ -3224,6 +3546,7 @@ static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info) char *alpha2 = NULL; int rem_reg_rules = 0, r = 0; u32 num_rules = 0, rule_idx = 0, size_of_regd; + u8 dfs_region = 0; struct ieee80211_regdomain *rd = NULL; if (!info->attrs[NL80211_ATTR_REG_ALPHA2]) @@ -3234,6 +3557,9 @@ static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info) alpha2 = nla_data(info->attrs[NL80211_ATTR_REG_ALPHA2]); + if (info->attrs[NL80211_ATTR_DFS_REGION]) + dfs_region = nla_get_u8(info->attrs[NL80211_ATTR_DFS_REGION]); + nla_for_each_nested(nl_reg_rule, info->attrs[NL80211_ATTR_REG_RULES], rem_reg_rules) { num_rules++; @@ -3261,6 +3587,13 @@ static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info) rd->alpha2[0] = alpha2[0]; rd->alpha2[1] = alpha2[1]; + /* + * Disable DFS master mode if the DFS region was + * not supported or known on this kernel. + */ + if (reg_supported_dfs_region(dfs_region)) + rd->dfs_region = dfs_region; + nla_for_each_nested(nl_reg_rule, info->attrs[NL80211_ATTR_REG_RULES], rem_reg_rules) { nla_parse(tb, NL80211_REG_RULE_ATTR_MAX, @@ -3477,6 +3810,9 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) } } + request->no_cck = + nla_get_flag(info->attrs[NL80211_ATTR_TX_NO_CCK_RATE]); + request->dev = dev; request->wiphy = &rdev->wiphy; @@ -3503,10 +3839,11 @@ static int nl80211_start_sched_scan(struct sk_buff *skb, struct net_device *dev = info->user_ptr[1]; struct nlattr *attr; struct wiphy *wiphy; - int err, tmp, n_ssids = 0, n_channels, i; + int err, tmp, n_ssids = 0, n_match_sets = 0, n_channels, i; u32 interval; enum ieee80211_band band; size_t ie_len; + struct nlattr *tb[NL80211_SCHED_SCAN_MATCH_ATTR_MAX + 1]; if (!(rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_SCHED_SCAN) || !rdev->ops->sched_scan_start) @@ -3545,6 +3882,15 @@ static int nl80211_start_sched_scan(struct sk_buff *skb, if (n_ssids > wiphy->max_sched_scan_ssids) return -EINVAL; + if (info->attrs[NL80211_ATTR_SCHED_SCAN_MATCH]) + nla_for_each_nested(attr, + info->attrs[NL80211_ATTR_SCHED_SCAN_MATCH], + tmp) + n_match_sets++; + + if (n_match_sets > wiphy->max_match_sets) + return -EINVAL; + if (info->attrs[NL80211_ATTR_IE]) ie_len = nla_len(info->attrs[NL80211_ATTR_IE]); else @@ -3562,6 +3908,7 @@ static int nl80211_start_sched_scan(struct sk_buff *skb, request = kzalloc(sizeof(*request) + sizeof(*request->ssids) * n_ssids + + sizeof(*request->match_sets) * n_match_sets + sizeof(*request->channels) * n_channels + ie_len, GFP_KERNEL); if (!request) { @@ -3579,6 +3926,18 @@ static int nl80211_start_sched_scan(struct sk_buff *skb, request->ie = (void *)(request->channels + n_channels); } + if (n_match_sets) { + if (request->ie) + request->match_sets = (void *)(request->ie + ie_len); + else if (request->ssids) + request->match_sets = + (void *)(request->ssids + n_ssids); + else + request->match_sets = + (void *)(request->channels + n_channels); + } + request->n_match_sets = n_match_sets; + i = 0; if (info->attrs[NL80211_ATTR_SCAN_FREQUENCIES]) { /* user specified, bail out if channel not found */ @@ -3643,6 +4002,31 @@ static int nl80211_start_sched_scan(struct sk_buff *skb, } } + i = 0; + if (info->attrs[NL80211_ATTR_SCHED_SCAN_MATCH]) { + nla_for_each_nested(attr, + info->attrs[NL80211_ATTR_SCHED_SCAN_MATCH], + tmp) { + struct nlattr *ssid; + + nla_parse(tb, NL80211_SCHED_SCAN_MATCH_ATTR_MAX, + nla_data(attr), nla_len(attr), + nl80211_match_policy); + ssid = tb[NL80211_ATTR_SCHED_SCAN_MATCH_SSID]; + if (ssid) { + if (nla_len(ssid) > IEEE80211_MAX_SSID_LEN) { + err = -EINVAL; + goto out_free; + } + memcpy(request->match_sets[i].ssid.ssid, + nla_data(ssid), nla_len(ssid)); + request->match_sets[i].ssid.ssid_len = + nla_len(ssid); + } + i++; + } + } + if (info->attrs[NL80211_ATTR_IE]) { request->ie_len = nla_len(info->attrs[NL80211_ATTR_IE]); memcpy((void *)request->ie, @@ -3935,22 +4319,6 @@ static bool nl80211_valid_wpa_versions(u32 wpa_versions) NL80211_WPA_VERSION_2)); } -static bool nl80211_valid_akm_suite(u32 akm) -{ - return akm == WLAN_AKM_SUITE_8021X || - akm == WLAN_AKM_SUITE_PSK; -} - -static bool nl80211_valid_cipher_suite(u32 cipher) -{ - return cipher == WLAN_CIPHER_SUITE_WEP40 || - cipher == WLAN_CIPHER_SUITE_WEP104 || - cipher == WLAN_CIPHER_SUITE_TKIP || - cipher == WLAN_CIPHER_SUITE_CCMP || - cipher == WLAN_CIPHER_SUITE_AES_CMAC; -} - - static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; @@ -4083,7 +4451,8 @@ static int nl80211_crypto_settings(struct cfg80211_registered_device *rdev, memcpy(settings->ciphers_pairwise, data, len); for (i = 0; i < settings->n_ciphers_pairwise; i++) - if (!nl80211_valid_cipher_suite( + if (!cfg80211_supported_cipher_suite( + &rdev->wiphy, settings->ciphers_pairwise[i])) return -EINVAL; } @@ -4091,7 +4460,8 @@ static int nl80211_crypto_settings(struct cfg80211_registered_device *rdev, if (info->attrs[NL80211_ATTR_CIPHER_SUITE_GROUP]) { settings->cipher_group = nla_get_u32(info->attrs[NL80211_ATTR_CIPHER_SUITE_GROUP]); - if (!nl80211_valid_cipher_suite(settings->cipher_group)) + if (!cfg80211_supported_cipher_suite(&rdev->wiphy, + settings->cipher_group)) return -EINVAL; } @@ -4104,7 +4474,7 @@ static int nl80211_crypto_settings(struct cfg80211_registered_device *rdev, if (info->attrs[NL80211_ATTR_AKM_SUITES]) { void *data; - int len, i; + int len; data = nla_data(info->attrs[NL80211_ATTR_AKM_SUITES]); len = nla_len(info->attrs[NL80211_ATTR_AKM_SUITES]); @@ -4117,10 +4487,6 @@ static int nl80211_crypto_settings(struct cfg80211_registered_device *rdev, return -EINVAL; memcpy(settings->akm_suites, data, len); - - for (i = 0; i < settings->n_akm_suites; i++) - if (!nl80211_valid_akm_suite(settings->akm_suites[i])) - return -EINVAL; } return 0; @@ -4135,6 +4501,9 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info) const u8 *bssid, *ssid, *ie = NULL, *prev_bssid = NULL; int err, ssid_len, ie_len = 0; bool use_mfp = false; + u32 flags = 0; + struct ieee80211_ht_cap *ht_capa = NULL; + struct ieee80211_ht_cap *ht_capa_mask = NULL; if (!is_valid_ie_attr(info->attrs[NL80211_ATTR_IE])) return -EINVAL; @@ -4178,11 +4547,25 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info) if (info->attrs[NL80211_ATTR_PREV_BSSID]) prev_bssid = nla_data(info->attrs[NL80211_ATTR_PREV_BSSID]); + if (nla_get_flag(info->attrs[NL80211_ATTR_DISABLE_HT])) + flags |= ASSOC_REQ_DISABLE_HT; + + if (info->attrs[NL80211_ATTR_HT_CAPABILITY_MASK]) + ht_capa_mask = + nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY_MASK]); + + if (info->attrs[NL80211_ATTR_HT_CAPABILITY]) { + if (!ht_capa_mask) + return -EINVAL; + ht_capa = nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY]); + } + err = nl80211_crypto_settings(rdev, info, &crypto, 1); if (!err) err = cfg80211_mlme_assoc(rdev, dev, chan, bssid, prev_bssid, ssid, ssid_len, ie, ie_len, use_mfp, - &crypto); + &crypto, flags, ht_capa, + ht_capa_mask); return err; } @@ -4339,8 +4722,12 @@ static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info) wiphy = &rdev->wiphy; - if (info->attrs[NL80211_ATTR_MAC]) + if (info->attrs[NL80211_ATTR_MAC]) { ibss.bssid = nla_data(info->attrs[NL80211_ATTR_MAC]); + + if (!is_valid_ether_addr(ibss.bssid)) + return -EINVAL; + } ibss.ssid = nla_data(info->attrs[NL80211_ATTR_SSID]); ibss.ssid_len = nla_len(info->attrs[NL80211_ATTR_SSID]); @@ -4349,13 +4736,41 @@ static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info) ibss.ie_len = nla_len(info->attrs[NL80211_ATTR_IE]); } - ibss.channel = ieee80211_get_channel(wiphy, - nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ])); + if (info->attrs[NL80211_ATTR_WIPHY_CHANNEL_TYPE]) { + enum nl80211_channel_type channel_type; + + channel_type = nla_get_u32( + info->attrs[NL80211_ATTR_WIPHY_CHANNEL_TYPE]); + if (channel_type != NL80211_CHAN_NO_HT && + channel_type != NL80211_CHAN_HT20 && + channel_type != NL80211_CHAN_HT40MINUS && + channel_type != NL80211_CHAN_HT40PLUS) + return -EINVAL; + + if (channel_type != NL80211_CHAN_NO_HT && + !(wiphy->features & NL80211_FEATURE_HT_IBSS)) + return -EINVAL; + + ibss.channel_type = channel_type; + } else { + ibss.channel_type = NL80211_CHAN_NO_HT; + } + + ibss.channel = rdev_freq_to_chan(rdev, + nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ]), + ibss.channel_type); if (!ibss.channel || ibss.channel->flags & IEEE80211_CHAN_NO_IBSS || ibss.channel->flags & IEEE80211_CHAN_DISABLED) return -EINVAL; + /* Both channels should be able to initiate communication */ + if ((ibss.channel_type == NL80211_CHAN_HT40PLUS || + ibss.channel_type == NL80211_CHAN_HT40MINUS) && + !cfg80211_can_beacon_sec_chan(&rdev->wiphy, ibss.channel, + ibss.channel_type)) + return -EINVAL; + ibss.channel_fixed = !!info->attrs[NL80211_ATTR_FREQ_FIXED]; ibss.privacy = !!info->attrs[NL80211_ATTR_PRIVACY]; @@ -4434,7 +4849,7 @@ static int nl80211_testmode_do(struct sk_buff *skb, struct genl_info *info) static int nl80211_testmode_dump(struct sk_buff *skb, struct netlink_callback *cb) { - struct cfg80211_registered_device *dev; + struct cfg80211_registered_device *rdev; int err; long phy_idx; void *data = NULL; @@ -4452,9 +4867,21 @@ static int nl80211_testmode_dump(struct sk_buff *skb, nl80211_policy); if (err) return err; - if (!nl80211_fam.attrbuf[NL80211_ATTR_WIPHY]) - return -EINVAL; - phy_idx = nla_get_u32(nl80211_fam.attrbuf[NL80211_ATTR_WIPHY]); + if (nl80211_fam.attrbuf[NL80211_ATTR_WIPHY]) { + phy_idx = nla_get_u32( + nl80211_fam.attrbuf[NL80211_ATTR_WIPHY]); + } else { + struct net_device *netdev; + + err = get_rdev_dev_by_ifindex(sock_net(skb->sk), + nl80211_fam.attrbuf, + &rdev, &netdev); + if (err) + return err; + dev_put(netdev); + phy_idx = rdev->wiphy_idx; + cfg80211_unlock_rdev(rdev); + } if (nl80211_fam.attrbuf[NL80211_ATTR_TESTDATA]) cb->args[1] = (long)nl80211_fam.attrbuf[NL80211_ATTR_TESTDATA]; @@ -4466,15 +4893,15 @@ static int nl80211_testmode_dump(struct sk_buff *skb, } mutex_lock(&cfg80211_mutex); - dev = cfg80211_rdev_by_wiphy_idx(phy_idx); - if (!dev) { + rdev = cfg80211_rdev_by_wiphy_idx(phy_idx); + if (!rdev) { mutex_unlock(&cfg80211_mutex); return -ENOENT; } - cfg80211_lock_rdev(dev); + cfg80211_lock_rdev(rdev); mutex_unlock(&cfg80211_mutex); - if (!dev->ops->testmode_dump) { + if (!rdev->ops->testmode_dump) { err = -EOPNOTSUPP; goto out_err; } @@ -4485,7 +4912,7 @@ static int nl80211_testmode_dump(struct sk_buff *skb, NL80211_CMD_TESTMODE); struct nlattr *tmdata; - if (nla_put_u32(skb, NL80211_ATTR_WIPHY, dev->wiphy_idx) < 0) { + if (nla_put_u32(skb, NL80211_ATTR_WIPHY, phy_idx) < 0) { genlmsg_cancel(skb, hdr); break; } @@ -4495,8 +4922,8 @@ static int nl80211_testmode_dump(struct sk_buff *skb, genlmsg_cancel(skb, hdr); break; } - err = dev->ops->testmode_dump(&dev->wiphy, skb, cb, - data, data_len); + err = rdev->ops->testmode_dump(&rdev->wiphy, skb, cb, + data, data_len); nla_nest_end(skb, tmdata); if (err == -ENOBUFS || err == -ENOENT) { @@ -4514,7 +4941,7 @@ static int nl80211_testmode_dump(struct sk_buff *skb, /* see above */ cb->args[0] = phy_idx + 1; out_err: - cfg80211_unlock_rdev(dev); + cfg80211_unlock_rdev(rdev); return err; } @@ -4668,6 +5095,22 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info) return PTR_ERR(connkeys); } + if (nla_get_flag(info->attrs[NL80211_ATTR_DISABLE_HT])) + connect.flags |= ASSOC_REQ_DISABLE_HT; + + if (info->attrs[NL80211_ATTR_HT_CAPABILITY_MASK]) + memcpy(&connect.ht_capa_mask, + nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY_MASK]), + sizeof(connect.ht_capa_mask)); + + if (info->attrs[NL80211_ATTR_HT_CAPABILITY]) { + if (!info->attrs[NL80211_ATTR_HT_CAPABILITY_MASK]) + return -EINVAL; + memcpy(&connect.ht_capa, + nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY]), + sizeof(connect.ht_capa)); + } + err = cfg80211_connect(rdev, dev, &connect, connkeys); if (err) kfree(connkeys); @@ -4777,6 +5220,57 @@ static int nl80211_flush_pmksa(struct sk_buff *skb, struct genl_info *info) return rdev->ops->flush_pmksa(&rdev->wiphy, dev); } +static int nl80211_tdls_mgmt(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct net_device *dev = info->user_ptr[1]; + u8 action_code, dialog_token; + u16 status_code; + u8 *peer; + + if (!(rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_TDLS) || + !rdev->ops->tdls_mgmt) + return -EOPNOTSUPP; + + if (!info->attrs[NL80211_ATTR_TDLS_ACTION] || + !info->attrs[NL80211_ATTR_STATUS_CODE] || + !info->attrs[NL80211_ATTR_TDLS_DIALOG_TOKEN] || + !info->attrs[NL80211_ATTR_IE] || + !info->attrs[NL80211_ATTR_MAC]) + return -EINVAL; + + peer = nla_data(info->attrs[NL80211_ATTR_MAC]); + action_code = nla_get_u8(info->attrs[NL80211_ATTR_TDLS_ACTION]); + status_code = nla_get_u16(info->attrs[NL80211_ATTR_STATUS_CODE]); + dialog_token = nla_get_u8(info->attrs[NL80211_ATTR_TDLS_DIALOG_TOKEN]); + + return rdev->ops->tdls_mgmt(&rdev->wiphy, dev, peer, action_code, + dialog_token, status_code, + nla_data(info->attrs[NL80211_ATTR_IE]), + nla_len(info->attrs[NL80211_ATTR_IE])); +} + +static int nl80211_tdls_oper(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct net_device *dev = info->user_ptr[1]; + enum nl80211_tdls_operation operation; + u8 *peer; + + if (!(rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_TDLS) || + !rdev->ops->tdls_oper) + return -EOPNOTSUPP; + + if (!info->attrs[NL80211_ATTR_TDLS_OPERATION] || + !info->attrs[NL80211_ATTR_MAC]) + return -EINVAL; + + operation = nla_get_u8(info->attrs[NL80211_ATTR_TDLS_OPERATION]); + peer = nla_data(info->attrs[NL80211_ATTR_MAC]); + + return rdev->ops->tdls_oper(&rdev->wiphy, dev, peer, operation); +} + static int nl80211_remain_on_channel(struct sk_buff *skb, struct genl_info *info) { @@ -4804,7 +5298,8 @@ static int nl80211_remain_on_channel(struct sk_buff *skb, duration > rdev->wiphy.max_remain_on_channel_duration) return -EINVAL; - if (!rdev->ops->remain_on_channel) + if (!rdev->ops->remain_on_channel || + !(rdev->wiphy.flags & WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL)) return -EOPNOTSUPP; if (info->attrs[NL80211_ATTR_WIPHY_CHANNEL_TYPE]) { @@ -4992,11 +5487,13 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info) bool channel_type_valid = false; u32 freq; int err; - void *hdr; + void *hdr = NULL; u64 cookie; - struct sk_buff *msg; + struct sk_buff *msg = NULL; unsigned int wait = 0; - bool offchan; + bool offchan, no_cck, dont_wait_for_ack; + + dont_wait_for_ack = info->attrs[NL80211_ATTR_DONT_WAIT_FOR_ACK]; if (!info->attrs[NL80211_ATTR_FRAME] || !info->attrs[NL80211_ATTR_WIPHY_FREQ]) @@ -5015,7 +5512,7 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info) return -EOPNOTSUPP; if (info->attrs[NL80211_ATTR_DURATION]) { - if (!rdev->ops->mgmt_tx_cancel_wait) + if (!(rdev->wiphy.flags & WIPHY_FLAG_OFFCHAN_TX)) return -EINVAL; wait = nla_get_u32(info->attrs[NL80211_ATTR_DURATION]); } @@ -5033,34 +5530,46 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info) offchan = info->attrs[NL80211_ATTR_OFFCHANNEL_TX_OK]; + if (offchan && !(rdev->wiphy.flags & WIPHY_FLAG_OFFCHAN_TX)) + return -EINVAL; + + no_cck = nla_get_flag(info->attrs[NL80211_ATTR_TX_NO_CCK_RATE]); + freq = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ]); chan = rdev_freq_to_chan(rdev, freq, channel_type); if (chan == NULL) return -EINVAL; - msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (!msg) - return -ENOMEM; + if (!dont_wait_for_ack) { + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; - hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0, - NL80211_CMD_FRAME); + hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0, + NL80211_CMD_FRAME); - if (IS_ERR(hdr)) { - err = PTR_ERR(hdr); - goto free_msg; + if (IS_ERR(hdr)) { + err = PTR_ERR(hdr); + goto free_msg; + } } + err = cfg80211_mlme_mgmt_tx(rdev, dev, chan, offchan, channel_type, channel_type_valid, wait, nla_data(info->attrs[NL80211_ATTR_FRAME]), nla_len(info->attrs[NL80211_ATTR_FRAME]), - &cookie); + no_cck, dont_wait_for_ack, &cookie); if (err) goto free_msg; - NLA_PUT_U64(msg, NL80211_ATTR_COOKIE, cookie); + if (msg) { + NLA_PUT_U64(msg, NL80211_ATTR_COOKIE, cookie); - genlmsg_end(msg, hdr); - return genlmsg_reply(msg, info); + genlmsg_end(msg, hdr); + return genlmsg_reply(msg, info); + } + + return 0; nla_put_failure: err = -ENOBUFS; @@ -5258,6 +5767,11 @@ static int nl80211_join_mesh(struct sk_buff *skb, struct genl_info *info) setup.mesh_id = nla_data(info->attrs[NL80211_ATTR_MESH_ID]); setup.mesh_id_len = nla_len(info->attrs[NL80211_ATTR_MESH_ID]); + if (info->attrs[NL80211_ATTR_MCAST_RATE] && + !nl80211_parse_mcast_rate(rdev, setup.mcast_rate, + nla_get_u32(info->attrs[NL80211_ATTR_MCAST_RATE]))) + return -EINVAL; + if (info->attrs[NL80211_ATTR_MESH_SETUP]) { /* parse additional setup parameters if given */ err = nl80211_parse_mesh_setup(info, &setup); @@ -5550,6 +6064,91 @@ static int nl80211_set_rekey_data(struct sk_buff *skb, struct genl_info *info) return err; } +static int nl80211_register_unexpected_frame(struct sk_buff *skb, + struct genl_info *info) +{ + struct net_device *dev = info->user_ptr[1]; + struct wireless_dev *wdev = dev->ieee80211_ptr; + + if (wdev->iftype != NL80211_IFTYPE_AP && + wdev->iftype != NL80211_IFTYPE_P2P_GO) + return -EINVAL; + + if (wdev->ap_unexpected_nlpid) + return -EBUSY; + + wdev->ap_unexpected_nlpid = info->snd_pid; + return 0; +} + +static int nl80211_probe_client(struct sk_buff *skb, + struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct net_device *dev = info->user_ptr[1]; + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct sk_buff *msg; + void *hdr; + const u8 *addr; + u64 cookie; + int err; + + if (wdev->iftype != NL80211_IFTYPE_AP && + wdev->iftype != NL80211_IFTYPE_P2P_GO) + return -EOPNOTSUPP; + + if (!info->attrs[NL80211_ATTR_MAC]) + return -EINVAL; + + if (!rdev->ops->probe_client) + return -EOPNOTSUPP; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0, + NL80211_CMD_PROBE_CLIENT); + + if (IS_ERR(hdr)) { + err = PTR_ERR(hdr); + goto free_msg; + } + + addr = nla_data(info->attrs[NL80211_ATTR_MAC]); + + err = rdev->ops->probe_client(&rdev->wiphy, dev, addr, &cookie); + if (err) + goto free_msg; + + NLA_PUT_U64(msg, NL80211_ATTR_COOKIE, cookie); + + genlmsg_end(msg, hdr); + + return genlmsg_reply(msg, info); + + nla_put_failure: + err = -ENOBUFS; + free_msg: + nlmsg_free(msg); + return err; +} + +static int nl80211_register_beacons(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + + if (!(rdev->wiphy.flags & WIPHY_FLAG_REPORTS_OBSS)) + return -EOPNOTSUPP; + + if (rdev->ap_beacons_nlpid) + return -EBUSY; + + rdev->ap_beacons_nlpid = info->snd_pid; + + return 0; +} + #define NL80211_FLAG_NEED_WIPHY 0x01 #define NL80211_FLAG_NEED_NETDEV 0x02 #define NL80211_FLAG_NEED_RTNL 0x04 @@ -5577,7 +6176,8 @@ static int nl80211_pre_doit(struct genl_ops *ops, struct sk_buff *skb, } info->user_ptr[0] = rdev; } else if (ops->internal_flags & NL80211_FLAG_NEED_NETDEV) { - err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev); + err = get_rdev_dev_by_ifindex(genl_info_net(info), info->attrs, + &rdev, &dev); if (err) { if (rtnl) rtnl_unlock(); @@ -6089,6 +6689,55 @@ static struct genl_ops nl80211_ops[] = { .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, }, + { + .cmd = NL80211_CMD_TDLS_MGMT, + .doit = nl80211_tdls_mgmt, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | + NL80211_FLAG_NEED_RTNL, + }, + { + .cmd = NL80211_CMD_TDLS_OPER, + .doit = nl80211_tdls_oper, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | + NL80211_FLAG_NEED_RTNL, + }, + { + .cmd = NL80211_CMD_UNEXPECTED_FRAME, + .doit = nl80211_register_unexpected_frame, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = NL80211_FLAG_NEED_NETDEV | + NL80211_FLAG_NEED_RTNL, + }, + { + .cmd = NL80211_CMD_PROBE_CLIENT, + .doit = nl80211_probe_client, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = NL80211_FLAG_NEED_NETDEV | + NL80211_FLAG_NEED_RTNL, + }, + { + .cmd = NL80211_CMD_REGISTER_BEACONS, + .doit = nl80211_register_beacons, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = NL80211_FLAG_NEED_WIPHY | + NL80211_FLAG_NEED_RTNL, + }, + { + .cmd = NL80211_CMD_SET_NOACK_MAP, + .doit = nl80211_set_noack_map, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = NL80211_FLAG_NEED_NETDEV | + NL80211_FLAG_NEED_RTNL, + }, + }; static struct genl_multicast_group nl80211_mlme_mcgrp = { @@ -6341,10 +6990,7 @@ void nl80211_send_reg_change_event(struct regulatory_request *request) if (wiphy_idx_valid(request->wiphy_idx)) NLA_PUT_U32(msg, NL80211_ATTR_WIPHY, request->wiphy_idx); - if (genlmsg_end(msg, hdr) < 0) { - nlmsg_free(msg); - return; - } + genlmsg_end(msg, hdr); rcu_read_lock(); genlmsg_multicast_allns(msg, 0, nl80211_regulatory_mcgrp.id, @@ -6380,10 +7026,7 @@ static void nl80211_send_mlme_event(struct cfg80211_registered_device *rdev, NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex); NLA_PUT(msg, NL80211_ATTR_FRAME, len, buf); - if (genlmsg_end(msg, hdr) < 0) { - nlmsg_free(msg); - return; - } + genlmsg_end(msg, hdr); genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0, nl80211_mlme_mcgrp.id, gfp); @@ -6464,10 +7107,7 @@ static void nl80211_send_mlme_timeout(struct cfg80211_registered_device *rdev, NLA_PUT_FLAG(msg, NL80211_ATTR_TIMED_OUT); NLA_PUT(msg, NL80211_ATTR_MAC, ETH_ALEN, addr); - if (genlmsg_end(msg, hdr) < 0) { - nlmsg_free(msg); - return; - } + genlmsg_end(msg, hdr); genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0, nl80211_mlme_mcgrp.id, gfp); @@ -6523,10 +7163,7 @@ void nl80211_send_connect_result(struct cfg80211_registered_device *rdev, if (resp_ie) NLA_PUT(msg, NL80211_ATTR_RESP_IE, resp_ie_len, resp_ie); - if (genlmsg_end(msg, hdr) < 0) { - nlmsg_free(msg); - return; - } + genlmsg_end(msg, hdr); genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0, nl80211_mlme_mcgrp.id, gfp); @@ -6564,10 +7201,7 @@ void nl80211_send_roamed(struct cfg80211_registered_device *rdev, if (resp_ie) NLA_PUT(msg, NL80211_ATTR_RESP_IE, resp_ie_len, resp_ie); - if (genlmsg_end(msg, hdr) < 0) { - nlmsg_free(msg); - return; - } + genlmsg_end(msg, hdr); genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0, nl80211_mlme_mcgrp.id, gfp); @@ -6605,10 +7239,7 @@ void nl80211_send_disconnected(struct cfg80211_registered_device *rdev, if (ie) NLA_PUT(msg, NL80211_ATTR_IE, ie_len, ie); - if (genlmsg_end(msg, hdr) < 0) { - nlmsg_free(msg); - return; - } + genlmsg_end(msg, hdr); genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0, nl80211_mlme_mcgrp.id, GFP_KERNEL); @@ -6641,10 +7272,7 @@ void nl80211_send_ibss_bssid(struct cfg80211_registered_device *rdev, NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex); NLA_PUT(msg, NL80211_ATTR_MAC, ETH_ALEN, bssid); - if (genlmsg_end(msg, hdr) < 0) { - nlmsg_free(msg); - return; - } + genlmsg_end(msg, hdr); genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0, nl80211_mlme_mcgrp.id, gfp); @@ -6679,10 +7307,7 @@ void nl80211_send_new_peer_candidate(struct cfg80211_registered_device *rdev, if (ie_len && ie) NLA_PUT(msg, NL80211_ATTR_IE, ie_len , ie); - if (genlmsg_end(msg, hdr) < 0) { - nlmsg_free(msg); - return; - } + genlmsg_end(msg, hdr); genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0, nl80211_mlme_mcgrp.id, gfp); @@ -6721,10 +7346,7 @@ void nl80211_michael_mic_failure(struct cfg80211_registered_device *rdev, if (tsc) NLA_PUT(msg, NL80211_ATTR_KEY_SEQ, 6, tsc); - if (genlmsg_end(msg, hdr) < 0) { - nlmsg_free(msg); - return; - } + genlmsg_end(msg, hdr); genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0, nl80211_mlme_mcgrp.id, gfp); @@ -6775,10 +7397,7 @@ void nl80211_send_beacon_hint_event(struct wiphy *wiphy, goto nla_put_failure; nla_nest_end(msg, nl_freq); - if (genlmsg_end(msg, hdr) < 0) { - nlmsg_free(msg); - return; - } + genlmsg_end(msg, hdr); rcu_read_lock(); genlmsg_multicast_allns(msg, 0, nl80211_regulatory_mcgrp.id, @@ -6821,10 +7440,7 @@ static void nl80211_send_remain_on_chan_event( if (cmd == NL80211_CMD_REMAIN_ON_CHANNEL) NLA_PUT_U32(msg, NL80211_ATTR_DURATION, duration); - if (genlmsg_end(msg, hdr) < 0) { - nlmsg_free(msg); - return; - } + genlmsg_end(msg, hdr); genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0, nl80211_mlme_mcgrp.id, gfp); @@ -6895,10 +7511,7 @@ void nl80211_send_sta_del_event(struct cfg80211_registered_device *rdev, NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, dev->ifindex); NLA_PUT(msg, NL80211_ATTR_MAC, ETH_ALEN, mac_addr); - if (genlmsg_end(msg, hdr) < 0) { - nlmsg_free(msg); - return; - } + genlmsg_end(msg, hdr); genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0, nl80211_mlme_mcgrp.id, gfp); @@ -6909,13 +7522,68 @@ void nl80211_send_sta_del_event(struct cfg80211_registered_device *rdev, nlmsg_free(msg); } +static bool __nl80211_unexpected_frame(struct net_device *dev, u8 cmd, + const u8 *addr, gfp_t gfp) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct sk_buff *msg; + void *hdr; + int err; + u32 nlpid = ACCESS_ONCE(wdev->ap_unexpected_nlpid); + + if (!nlpid) + return false; + + msg = nlmsg_new(100, gfp); + if (!msg) + return true; + + hdr = nl80211hdr_put(msg, 0, 0, 0, cmd); + if (!hdr) { + nlmsg_free(msg); + return true; + } + + NLA_PUT_U32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx); + NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, dev->ifindex); + NLA_PUT(msg, NL80211_ATTR_MAC, ETH_ALEN, addr); + + err = genlmsg_end(msg, hdr); + if (err < 0) { + nlmsg_free(msg); + return true; + } + + genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, nlpid); + return true; + + nla_put_failure: + genlmsg_cancel(msg, hdr); + nlmsg_free(msg); + return true; +} + +bool nl80211_unexpected_frame(struct net_device *dev, const u8 *addr, gfp_t gfp) +{ + return __nl80211_unexpected_frame(dev, NL80211_CMD_UNEXPECTED_FRAME, + addr, gfp); +} + +bool nl80211_unexpected_4addr_frame(struct net_device *dev, + const u8 *addr, gfp_t gfp) +{ + return __nl80211_unexpected_frame(dev, + NL80211_CMD_UNEXPECTED_4ADDR_FRAME, + addr, gfp); +} + int nl80211_send_mgmt(struct cfg80211_registered_device *rdev, struct net_device *netdev, u32 nlpid, int freq, const u8 *buf, size_t len, gfp_t gfp) { struct sk_buff *msg; void *hdr; - int err; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); if (!msg) @@ -6932,16 +7600,9 @@ int nl80211_send_mgmt(struct cfg80211_registered_device *rdev, NLA_PUT_U32(msg, NL80211_ATTR_WIPHY_FREQ, freq); NLA_PUT(msg, NL80211_ATTR_FRAME, len, buf); - err = genlmsg_end(msg, hdr); - if (err < 0) { - nlmsg_free(msg); - return err; - } + genlmsg_end(msg, hdr); - err = genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, nlpid); - if (err < 0) - return err; - return 0; + return genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, nlpid); nla_put_failure: genlmsg_cancel(msg, hdr); @@ -6974,10 +7635,7 @@ void nl80211_send_mgmt_tx_status(struct cfg80211_registered_device *rdev, if (ack) NLA_PUT_FLAG(msg, NL80211_ATTR_ACK); - if (genlmsg_end(msg, hdr) < 0) { - nlmsg_free(msg); - return; - } + genlmsg_end(msg, hdr); genlmsg_multicast(msg, 0, nl80211_mlme_mcgrp.id, gfp); return; @@ -7019,10 +7677,7 @@ nl80211_send_cqm_rssi_notify(struct cfg80211_registered_device *rdev, nla_nest_end(msg, pinfoattr); - if (genlmsg_end(msg, hdr) < 0) { - nlmsg_free(msg); - return; - } + genlmsg_end(msg, hdr); genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0, nl80211_mlme_mcgrp.id, gfp); @@ -7064,11 +7719,51 @@ void nl80211_gtk_rekey_notify(struct cfg80211_registered_device *rdev, nla_nest_end(msg, rekey_attr); - if (genlmsg_end(msg, hdr) < 0) { + genlmsg_end(msg, hdr); + + genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0, + nl80211_mlme_mcgrp.id, gfp); + return; + + nla_put_failure: + genlmsg_cancel(msg, hdr); + nlmsg_free(msg); +} + +void nl80211_pmksa_candidate_notify(struct cfg80211_registered_device *rdev, + struct net_device *netdev, int index, + const u8 *bssid, bool preauth, gfp_t gfp) +{ + struct sk_buff *msg; + struct nlattr *attr; + void *hdr; + + msg = nlmsg_new(NLMSG_GOODSIZE, gfp); + if (!msg) + return; + + hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_PMKSA_CANDIDATE); + if (!hdr) { nlmsg_free(msg); return; } + NLA_PUT_U32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx); + NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex); + + attr = nla_nest_start(msg, NL80211_ATTR_PMKSA_CANDIDATE); + if (!attr) + goto nla_put_failure; + + NLA_PUT_U32(msg, NL80211_PMKSA_CANDIDATE_INDEX, index); + NLA_PUT(msg, NL80211_PMKSA_CANDIDATE_BSSID, ETH_ALEN, bssid); + if (preauth) + NLA_PUT_FLAG(msg, NL80211_PMKSA_CANDIDATE_PREAUTH); + + nla_nest_end(msg, attr); + + genlmsg_end(msg, hdr); + genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0, nl80211_mlme_mcgrp.id, gfp); return; @@ -7109,7 +7804,45 @@ nl80211_send_cqm_pktloss_notify(struct cfg80211_registered_device *rdev, nla_nest_end(msg, pinfoattr); - if (genlmsg_end(msg, hdr) < 0) { + genlmsg_end(msg, hdr); + + genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0, + nl80211_mlme_mcgrp.id, gfp); + return; + + nla_put_failure: + genlmsg_cancel(msg, hdr); + nlmsg_free(msg); +} + +void cfg80211_probe_status(struct net_device *dev, const u8 *addr, + u64 cookie, bool acked, gfp_t gfp) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct sk_buff *msg; + void *hdr; + int err; + + msg = nlmsg_new(NLMSG_GOODSIZE, gfp); + if (!msg) + return; + + hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_PROBE_CLIENT); + if (!hdr) { + nlmsg_free(msg); + return; + } + + NLA_PUT_U32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx); + NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, dev->ifindex); + NLA_PUT(msg, NL80211_ATTR_MAC, ETH_ALEN, addr); + NLA_PUT_U64(msg, NL80211_ATTR_COOKIE, cookie); + if (acked) + NLA_PUT_FLAG(msg, NL80211_ATTR_ACK); + + err = genlmsg_end(msg, hdr); + if (err < 0) { nlmsg_free(msg); return; } @@ -7122,6 +7855,45 @@ nl80211_send_cqm_pktloss_notify(struct cfg80211_registered_device *rdev, genlmsg_cancel(msg, hdr); nlmsg_free(msg); } +EXPORT_SYMBOL(cfg80211_probe_status); + +void cfg80211_report_obss_beacon(struct wiphy *wiphy, + const u8 *frame, size_t len, + int freq, gfp_t gfp) +{ + struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct sk_buff *msg; + void *hdr; + u32 nlpid = ACCESS_ONCE(rdev->ap_beacons_nlpid); + + if (!nlpid) + return; + + msg = nlmsg_new(len + 100, gfp); + if (!msg) + return; + + hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_FRAME); + if (!hdr) { + nlmsg_free(msg); + return; + } + + NLA_PUT_U32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx); + if (freq) + NLA_PUT_U32(msg, NL80211_ATTR_WIPHY_FREQ, freq); + NLA_PUT(msg, NL80211_ATTR_FRAME, len, frame); + + genlmsg_end(msg, hdr); + + genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, nlpid); + return; + + nla_put_failure: + genlmsg_cancel(msg, hdr); + nlmsg_free(msg); +} +EXPORT_SYMBOL(cfg80211_report_obss_beacon); static int nl80211_netlink_notify(struct notifier_block * nb, unsigned long state, @@ -7136,9 +7908,12 @@ static int nl80211_netlink_notify(struct notifier_block * nb, rcu_read_lock(); - list_for_each_entry_rcu(rdev, &cfg80211_rdev_list, list) + list_for_each_entry_rcu(rdev, &cfg80211_rdev_list, list) { list_for_each_entry_rcu(wdev, &rdev->netdev_list, list) cfg80211_mlme_unregister_socket(wdev, notify->pid); + if (rdev->ap_beacons_nlpid == notify->pid) + rdev->ap_beacons_nlpid = 0; + } rcu_read_unlock(); diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h index 5d69c56400ae..12bf4d185abe 100644 --- a/net/wireless/nl80211.h +++ b/net/wireless/nl80211.h @@ -113,4 +113,13 @@ void nl80211_gtk_rekey_notify(struct cfg80211_registered_device *rdev, struct net_device *netdev, const u8 *bssid, const u8 *replay_ctr, gfp_t gfp); +void nl80211_pmksa_candidate_notify(struct cfg80211_registered_device *rdev, + struct net_device *netdev, int index, + const u8 *bssid, bool preauth, gfp_t gfp); + +bool nl80211_unexpected_frame(struct net_device *dev, + const u8 *addr, gfp_t gfp); +bool nl80211_unexpected_4addr_frame(struct net_device *dev, + const u8 *addr, gfp_t gfp); + #endif /* __NET_WIRELESS_NL80211_H */ diff --git a/net/wireless/radiotap.c b/net/wireless/radiotap.c index dbe35e138e94..c4ad7958af52 100644 --- a/net/wireless/radiotap.c +++ b/net/wireless/radiotap.c @@ -15,6 +15,7 @@ */ #include <linux/kernel.h> +#include <linux/export.h> #include <net/cfg80211.h> #include <net/ieee80211_radiotap.h> #include <asm/unaligned.h> diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 68a471ba193f..f65feaad155f 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -2,13 +2,22 @@ * Copyright 2002-2005, Instant802 Networks, Inc. * Copyright 2005-2006, Devicescape Software, Inc. * Copyright 2007 Johannes Berg <johannes@sipsolutions.net> - * Copyright 2008 Luis R. Rodriguez <lrodriguz@atheros.com> + * Copyright 2008-2011 Luis R. Rodriguez <mcgrof@qca.qualcomm.com> * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ + /** * DOC: Wireless regulatory infrastructure * @@ -36,12 +45,14 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/kernel.h> +#include <linux/export.h> #include <linux/slab.h> #include <linux/list.h> #include <linux/random.h> #include <linux/ctype.h> #include <linux/nl80211.h> #include <linux/platform_device.h> +#include <linux/moduleparam.h> #include <net/cfg80211.h> #include "core.h" #include "reg.h" @@ -49,16 +60,23 @@ #include "nl80211.h" #ifdef CONFIG_CFG80211_REG_DEBUG -#define REG_DBG_PRINT(format, args...) \ - do { \ - printk(KERN_DEBUG pr_fmt(format), ##args); \ - } while (0) +#define REG_DBG_PRINT(format, args...) \ + printk(KERN_DEBUG pr_fmt(format), ##args) #else #define REG_DBG_PRINT(args...) #endif +static struct regulatory_request core_request_world = { + .initiator = NL80211_REGDOM_SET_BY_CORE, + .alpha2[0] = '0', + .alpha2[1] = '0', + .intersect = false, + .processed = true, + .country_ie_env = ENVIRON_ANY, +}; + /* Receipt of information from last regulatory request */ -static struct regulatory_request *last_request; +static struct regulatory_request *last_request = &core_request_world; /* To trigger userspace events */ static struct platform_device *reg_pdev; @@ -150,7 +168,7 @@ static char user_alpha2[2]; module_param(ieee80211_regdom, charp, 0444); MODULE_PARM_DESC(ieee80211_regdom, "IEEE 802.11 regulatory domain code"); -static void reset_regdomains(void) +static void reset_regdomains(bool full_reset) { /* avoid freeing static information or freeing something twice */ if (cfg80211_regdomain == cfg80211_world_regdom) @@ -165,6 +183,13 @@ static void reset_regdomains(void) cfg80211_world_regdom = &world_regdom; cfg80211_regdomain = NULL; + + if (!full_reset) + return; + + if (last_request != &core_request_world) + kfree(last_request); + last_request = &core_request_world; } /* @@ -175,7 +200,7 @@ static void update_world_regdomain(const struct ieee80211_regdomain *rd) { BUG_ON(!last_request); - reset_regdomains(); + reset_regdomains(false); cfg80211_world_regdom = rd; cfg80211_regdomain = rd; @@ -753,9 +778,10 @@ static void chan_reg_rule_print_dbg(struct ieee80211_channel *chan, chan->center_freq, KHZ_TO_MHZ(desired_bw_khz)); - REG_DBG_PRINT("%d KHz - %d KHz @ KHz), (%s mBi, %d mBm)\n", + REG_DBG_PRINT("%d KHz - %d KHz @ %d KHz), (%s mBi, %d mBm)\n", freq_range->start_freq_khz, freq_range->end_freq_khz, + freq_range->max_bandwidth_khz, max_antenna_gain, power_rule->max_eirp); } @@ -856,10 +882,22 @@ static void handle_channel(struct wiphy *wiphy, chan->flags = flags | bw_flags | map_regdom_flags(reg_rule->flags); chan->max_antenna_gain = min(chan->orig_mag, (int) MBI_TO_DBI(power_rule->max_antenna_gain)); - if (chan->orig_mpwr) - chan->max_power = min(chan->orig_mpwr, - (int) MBM_TO_DBM(power_rule->max_eirp)); - else + if (chan->orig_mpwr) { + /* + * Devices that have their own custom regulatory domain + * but also use WIPHY_FLAG_STRICT_REGULATORY will follow the + * passed country IE power settings. + */ + if (initiator == NL80211_REGDOM_SET_BY_COUNTRY_IE && + wiphy->flags & WIPHY_FLAG_CUSTOM_REGULATORY && + wiphy->flags & WIPHY_FLAG_STRICT_REGULATORY) { + chan->max_power = + MBM_TO_DBM(power_rule->max_eirp); + } else { + chan->max_power = min(chan->orig_mpwr, + (int) MBM_TO_DBM(power_rule->max_eirp)); + } + } else chan->max_power = (int) MBM_TO_DBM(power_rule->max_eirp); } @@ -891,7 +929,7 @@ static bool ignore_reg_update(struct wiphy *wiphy, wiphy->flags & WIPHY_FLAG_CUSTOM_REGULATORY) { REG_DBG_PRINT("Ignoring regulatory request %s " "since the driver uses its own custom " - "regulatory domain ", + "regulatory domain\n", reg_initiator_name(initiator)); return true; } @@ -905,7 +943,7 @@ static bool ignore_reg_update(struct wiphy *wiphy, !is_world_regdom(last_request->alpha2)) { REG_DBG_PRINT("Ignoring regulatory request %s " "since the driver requires its own regulatory " - "domain to be set first", + "domain to be set first\n", reg_initiator_name(initiator)); return true; } @@ -913,14 +951,6 @@ static bool ignore_reg_update(struct wiphy *wiphy, return false; } -static void update_all_wiphy_regulatory(enum nl80211_reg_initiator initiator) -{ - struct cfg80211_registered_device *rdev; - - list_for_each_entry(rdev, &cfg80211_rdev_list, list) - wiphy_update_regulatory(&rdev->wiphy, initiator); -} - static void handle_reg_beacon(struct wiphy *wiphy, unsigned int chan_idx, struct reg_beacon *reg_beacon) @@ -1120,14 +1150,18 @@ static void reg_process_ht_flags(struct wiphy *wiphy) } -void wiphy_update_regulatory(struct wiphy *wiphy, - enum nl80211_reg_initiator initiator) +static void wiphy_update_regulatory(struct wiphy *wiphy, + enum nl80211_reg_initiator initiator) { enum ieee80211_band band; + assert_reg_lock(); + if (ignore_reg_update(wiphy, initiator)) return; + last_request->dfs_region = cfg80211_regdomain->dfs_region; + for (band = 0; band < IEEE80211_NUM_BANDS; band++) { if (wiphy->bands[band]) handle_band(wiphy, band, initiator); @@ -1139,6 +1173,34 @@ void wiphy_update_regulatory(struct wiphy *wiphy, wiphy->reg_notifier(wiphy, last_request); } +void regulatory_update(struct wiphy *wiphy, + enum nl80211_reg_initiator setby) +{ + mutex_lock(®_mutex); + wiphy_update_regulatory(wiphy, setby); + mutex_unlock(®_mutex); +} + +static void update_all_wiphy_regulatory(enum nl80211_reg_initiator initiator) +{ + struct cfg80211_registered_device *rdev; + struct wiphy *wiphy; + + list_for_each_entry(rdev, &cfg80211_rdev_list, list) { + wiphy = &rdev->wiphy; + wiphy_update_regulatory(wiphy, initiator); + /* + * Regulatory updates set by CORE are ignored for custom + * regulatory cards. Let us notify the changes to the driver, + * as some drivers used this to restore its orig_* reg domain. + */ + if (initiator == NL80211_REGDOM_SET_BY_CORE && + wiphy->flags & WIPHY_FLAG_CUSTOM_REGULATORY && + wiphy->reg_notifier) + wiphy->reg_notifier(wiphy, last_request); + } +} + static void handle_channel_custom(struct wiphy *wiphy, enum ieee80211_band band, unsigned int chan_idx, @@ -1396,7 +1458,8 @@ static int __regulatory_hint(struct wiphy *wiphy, } new_request: - kfree(last_request); + if (last_request != &core_request_world) + kfree(last_request); last_request = pending_request; last_request->intersect = intersect; @@ -1426,18 +1489,18 @@ new_request: } /* This processes *all* regulatory hints */ -static void reg_process_hint(struct regulatory_request *reg_request) +static void reg_process_hint(struct regulatory_request *reg_request, + enum nl80211_reg_initiator reg_initiator) { int r = 0; struct wiphy *wiphy = NULL; - enum nl80211_reg_initiator initiator = reg_request->initiator; BUG_ON(!reg_request->alpha2); if (wiphy_idx_valid(reg_request->wiphy_idx)) wiphy = wiphy_idx_to_wiphy(reg_request->wiphy_idx); - if (reg_request->initiator == NL80211_REGDOM_SET_BY_DRIVER && + if (reg_initiator == NL80211_REGDOM_SET_BY_DRIVER && !wiphy) { kfree(reg_request); return; @@ -1447,7 +1510,7 @@ static void reg_process_hint(struct regulatory_request *reg_request) /* This is required so that the orig_* parameters are saved */ if (r == -EALREADY && wiphy && wiphy->flags & WIPHY_FLAG_STRICT_REGULATORY) { - wiphy_update_regulatory(wiphy, initiator); + wiphy_update_regulatory(wiphy, reg_initiator); return; } @@ -1456,7 +1519,7 @@ static void reg_process_hint(struct regulatory_request *reg_request) * source of bogus requests. */ if (r != -EALREADY && - reg_request->initiator == NL80211_REGDOM_SET_BY_USER) + reg_initiator == NL80211_REGDOM_SET_BY_USER) schedule_delayed_work(®_timeout, msecs_to_jiffies(3142)); } @@ -1475,7 +1538,7 @@ static void reg_process_pending_hints(void) /* When last_request->processed becomes true this will be rescheduled */ if (last_request && !last_request->processed) { REG_DBG_PRINT("Pending regulatory request, waiting " - "for it to be processed..."); + "for it to be processed...\n"); goto out; } @@ -1493,7 +1556,7 @@ static void reg_process_pending_hints(void) spin_unlock(®_requests_lock); - reg_process_hint(reg_request); + reg_process_hint(reg_request, reg_request->initiator); out: mutex_unlock(®_mutex); @@ -1566,9 +1629,6 @@ static int regulatory_hint_core(const char *alpha2) { struct regulatory_request *request; - kfree(last_request); - last_request = NULL; - request = kzalloc(sizeof(struct regulatory_request), GFP_KERNEL); if (!request) @@ -1741,6 +1801,26 @@ static void restore_alpha2(char *alpha2, bool reset_user) REG_DBG_PRINT("Restoring regulatory settings\n"); } +static void restore_custom_reg_settings(struct wiphy *wiphy) +{ + struct ieee80211_supported_band *sband; + enum ieee80211_band band; + struct ieee80211_channel *chan; + int i; + + for (band = 0; band < IEEE80211_NUM_BANDS; band++) { + sband = wiphy->bands[band]; + if (!sband) + continue; + for (i = 0; i < sband->n_channels; i++) { + chan = &sband->channels[i]; + chan->flags = chan->orig_flags; + chan->max_antenna_gain = chan->orig_mag; + chan->max_power = chan->orig_mpwr; + } + } +} + /* * Restoring regulatory settings involves ingoring any * possibly stale country IE information and user regulatory @@ -1759,14 +1839,16 @@ static void restore_alpha2(char *alpha2, bool reset_user) static void restore_regulatory_settings(bool reset_user) { char alpha2[2]; + char world_alpha2[2]; struct reg_beacon *reg_beacon, *btmp; struct regulatory_request *reg_request, *tmp; LIST_HEAD(tmp_reg_req_list); + struct cfg80211_registered_device *rdev; mutex_lock(&cfg80211_mutex); mutex_lock(®_mutex); - reset_regdomains(); + reset_regdomains(true); restore_alpha2(alpha2, reset_user); /* @@ -1809,11 +1891,18 @@ static void restore_regulatory_settings(bool reset_user) /* First restore to the basic regulatory settings */ cfg80211_regdomain = cfg80211_world_regdom; + world_alpha2[0] = cfg80211_regdomain->alpha2[0]; + world_alpha2[1] = cfg80211_regdomain->alpha2[1]; + + list_for_each_entry(rdev, &cfg80211_rdev_list, list) { + if (rdev->wiphy.flags & WIPHY_FLAG_CUSTOM_REGULATORY) + restore_custom_reg_settings(&rdev->wiphy); + } mutex_unlock(®_mutex); mutex_unlock(&cfg80211_mutex); - regulatory_hint_core(cfg80211_regdomain->alpha2); + regulatory_hint_core(world_alpha2); /* * This restores the ieee80211_regdom module parameter @@ -1910,7 +1999,7 @@ static void print_rd_rules(const struct ieee80211_regdomain *rd) const struct ieee80211_freq_range *freq_range = NULL; const struct ieee80211_power_rule *power_rule = NULL; - pr_info(" (start_freq - end_freq @ bandwidth), (max_antenna_gain, max_eirp)\n"); + pr_info(" (start_freq - end_freq @ bandwidth), (max_antenna_gain, max_eirp)\n"); for (i = 0; i < rd->n_reg_rules; i++) { reg_rule = &rd->reg_rules[i]; @@ -1922,14 +2011,14 @@ static void print_rd_rules(const struct ieee80211_regdomain *rd) * in certain regions */ if (power_rule->max_antenna_gain) - pr_info(" (%d KHz - %d KHz @ %d KHz), (%d mBi, %d mBm)\n", + pr_info(" (%d KHz - %d KHz @ %d KHz), (%d mBi, %d mBm)\n", freq_range->start_freq_khz, freq_range->end_freq_khz, freq_range->max_bandwidth_khz, power_rule->max_antenna_gain, power_rule->max_eirp); else - pr_info(" (%d KHz - %d KHz @ %d KHz), (N/A, %d mBm)\n", + pr_info(" (%d KHz - %d KHz @ %d KHz), (N/A, %d mBm)\n", freq_range->start_freq_khz, freq_range->end_freq_khz, freq_range->max_bandwidth_khz, @@ -1937,6 +2026,42 @@ static void print_rd_rules(const struct ieee80211_regdomain *rd) } } +bool reg_supported_dfs_region(u8 dfs_region) +{ + switch (dfs_region) { + case NL80211_DFS_UNSET: + case NL80211_DFS_FCC: + case NL80211_DFS_ETSI: + case NL80211_DFS_JP: + return true; + default: + REG_DBG_PRINT("Ignoring uknown DFS master region: %d\n", + dfs_region); + return false; + } +} + +static void print_dfs_region(u8 dfs_region) +{ + if (!dfs_region) + return; + + switch (dfs_region) { + case NL80211_DFS_FCC: + pr_info(" DFS Master region FCC"); + break; + case NL80211_DFS_ETSI: + pr_info(" DFS Master region ETSI"); + break; + case NL80211_DFS_JP: + pr_info(" DFS Master region JP"); + break; + default: + pr_info(" DFS Master region Uknown"); + break; + } +} + static void print_regdomain(const struct ieee80211_regdomain *rd) { @@ -1964,6 +2089,7 @@ static void print_regdomain(const struct ieee80211_regdomain *rd) pr_info("Regulatory domain changed to country: %c%c\n", rd->alpha2[0], rd->alpha2[1]); } + print_dfs_region(rd->dfs_region); print_rd_rules(rd); } @@ -2026,12 +2152,18 @@ static int __set_regdom(const struct ieee80211_regdomain *rd) } request_wiphy = wiphy_idx_to_wiphy(last_request->wiphy_idx); + if (!request_wiphy && + (last_request->initiator == NL80211_REGDOM_SET_BY_DRIVER || + last_request->initiator == NL80211_REGDOM_SET_BY_COUNTRY_IE)) { + schedule_delayed_work(®_timeout, 0); + return -ENODEV; + } if (!last_request->intersect) { int r; if (last_request->initiator != NL80211_REGDOM_SET_BY_DRIVER) { - reset_regdomains(); + reset_regdomains(false); cfg80211_regdomain = rd; return 0; } @@ -2052,7 +2184,7 @@ static int __set_regdom(const struct ieee80211_regdomain *rd) if (r) return r; - reset_regdomains(); + reset_regdomains(false); cfg80211_regdomain = rd; return 0; } @@ -2077,7 +2209,7 @@ static int __set_regdom(const struct ieee80211_regdomain *rd) rd = NULL; - reset_regdomains(); + reset_regdomains(false); cfg80211_regdomain = intersected_rd; return 0; @@ -2097,7 +2229,7 @@ static int __set_regdom(const struct ieee80211_regdomain *rd) kfree(rd); rd = NULL; - reset_regdomains(); + reset_regdomains(false); cfg80211_regdomain = intersected_rd; return 0; @@ -2188,7 +2320,7 @@ out: static void reg_timeout_work(struct work_struct *work) { REG_DBG_PRINT("Timeout while waiting for CRDA to reply, " - "restoring regulatory settings"); + "restoring regulatory settings\n"); restore_regulatory_settings(true); } @@ -2250,9 +2382,9 @@ void /* __init_or_exit */ regulatory_exit(void) mutex_lock(&cfg80211_mutex); mutex_lock(®_mutex); - reset_regdomains(); + reset_regdomains(true); - kfree(last_request); + dev_set_uevent_suppress(®_pdev->dev, true); platform_device_unregister(reg_pdev); diff --git a/net/wireless/reg.h b/net/wireless/reg.h index b67d1c3a2fb9..e2aaaf525a22 100644 --- a/net/wireless/reg.h +++ b/net/wireless/reg.h @@ -1,10 +1,26 @@ #ifndef __NET_WIRELESS_REG_H #define __NET_WIRELESS_REG_H +/* + * Copyright 2008-2011 Luis R. Rodriguez <mcgrof@qca.qualcomm.com> + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ extern const struct ieee80211_regdomain *cfg80211_regdomain; bool is_world_regdom(const char *alpha2); bool reg_is_valid_request(const char *alpha2); +bool reg_supported_dfs_region(u8 dfs_region); int regulatory_hint_user(const char *alpha2); @@ -16,6 +32,8 @@ void regulatory_exit(void); int set_regdom(const struct ieee80211_regdomain *rd); +void regulatory_update(struct wiphy *wiphy, enum nl80211_reg_initiator setby); + /** * regulatory_hint_found_beacon - hints a beacon was found on a channel * @wiphy: the wireless device where the beacon was found on diff --git a/net/wireless/regdb.h b/net/wireless/regdb.h index 818222c92513..3279cfcefb0c 100644 --- a/net/wireless/regdb.h +++ b/net/wireless/regdb.h @@ -1,6 +1,22 @@ #ifndef __REGDB_H__ #define __REGDB_H__ +/* + * Copyright 2009 John W. Linville <linville@tuxdriver.com> + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + extern const struct ieee80211_regdomain *reg_regdb[]; extern int reg_regdb_size; diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 2936cb809152..31119e32e092 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -12,6 +12,7 @@ #include <linux/etherdevice.h> #include <net/arp.h> #include <net/cfg80211.h> +#include <net/cfg80211-wext.h> #include <net/iw_handler.h> #include "core.h" #include "nl80211.h" @@ -227,21 +228,51 @@ const u8 *cfg80211_find_ie(u8 eid, const u8 *ies, int len) } EXPORT_SYMBOL(cfg80211_find_ie); +const u8 *cfg80211_find_vendor_ie(unsigned int oui, u8 oui_type, + const u8 *ies, int len) +{ + struct ieee80211_vendor_ie *ie; + const u8 *pos = ies, *end = ies + len; + int ie_oui; + + while (pos < end) { + pos = cfg80211_find_ie(WLAN_EID_VENDOR_SPECIFIC, pos, + end - pos); + if (!pos) + return NULL; + + if (end - pos < sizeof(*ie)) + return NULL; + + ie = (struct ieee80211_vendor_ie *)pos; + ie_oui = ie->oui[0] << 16 | ie->oui[1] << 8 | ie->oui[2]; + if (ie_oui == oui && ie->oui_type == oui_type) + return pos; + + pos += 2 + ie->len; + } + return NULL; +} +EXPORT_SYMBOL(cfg80211_find_vendor_ie); + static int cmp_ies(u8 num, u8 *ies1, size_t len1, u8 *ies2, size_t len2) { const u8 *ie1 = cfg80211_find_ie(num, ies1, len1); const u8 *ie2 = cfg80211_find_ie(num, ies2, len2); - int r; + /* equal if both missing */ if (!ie1 && !ie2) return 0; - if (!ie1 || !ie2) + /* sort missing IE before (left of) present IE */ + if (!ie1) return -1; + if (!ie2) + return 1; - r = memcmp(ie1 + 2, ie2 + 2, min(ie1[1], ie2[1])); - if (r == 0 && ie1[1] != ie2[1]) + /* sort by length first, then by contents */ + if (ie1[1] != ie2[1]) return ie2[1] - ie1[1]; - return r; + return memcmp(ie1 + 2, ie2 + 2, ie1[1]); } static bool is_bss(struct cfg80211_bss *a, @@ -324,8 +355,8 @@ static bool is_mesh(struct cfg80211_bss *a, sizeof(struct ieee80211_meshconf_ie) - 2) == 0; } -static int cmp_bss(struct cfg80211_bss *a, - struct cfg80211_bss *b) +static int cmp_bss_core(struct cfg80211_bss *a, + struct cfg80211_bss *b) { int r; @@ -347,7 +378,15 @@ static int cmp_bss(struct cfg80211_bss *a, b->len_information_elements); } - r = memcmp(a->bssid, b->bssid, ETH_ALEN); + return memcmp(a->bssid, b->bssid, ETH_ALEN); +} + +static int cmp_bss(struct cfg80211_bss *a, + struct cfg80211_bss *b) +{ + int r; + + r = cmp_bss_core(a, b); if (r) return r; @@ -358,6 +397,52 @@ static int cmp_bss(struct cfg80211_bss *a, b->len_information_elements); } +static int cmp_hidden_bss(struct cfg80211_bss *a, + struct cfg80211_bss *b) +{ + const u8 *ie1; + const u8 *ie2; + int i; + int r; + + r = cmp_bss_core(a, b); + if (r) + return r; + + ie1 = cfg80211_find_ie(WLAN_EID_SSID, + a->information_elements, + a->len_information_elements); + ie2 = cfg80211_find_ie(WLAN_EID_SSID, + b->information_elements, + b->len_information_elements); + + /* Key comparator must use same algorithm in any rb-tree + * search function (order is important), otherwise ordering + * of items in the tree is broken and search gives incorrect + * results. This code uses same order as cmp_ies() does. */ + + /* sort missing IE before (left of) present IE */ + if (!ie1) + return -1; + if (!ie2) + return 1; + + /* zero-size SSID is used as an indication of the hidden bss */ + if (!ie2[1]) + return 0; + + /* sort by length first, then by contents */ + if (ie1[1] != ie2[1]) + return ie2[1] - ie1[1]; + + /* zeroed SSID ie is another indication of a hidden bss */ + for (i = 0; i < ie2[1]; i++) + if (ie2[i + 2]) + return -1; + + return 0; +} + struct cfg80211_bss *cfg80211_get_bss(struct wiphy *wiphy, struct ieee80211_channel *channel, const u8 *bssid, @@ -474,6 +559,48 @@ rb_find_bss(struct cfg80211_registered_device *dev, } static struct cfg80211_internal_bss * +rb_find_hidden_bss(struct cfg80211_registered_device *dev, + struct cfg80211_internal_bss *res) +{ + struct rb_node *n = dev->bss_tree.rb_node; + struct cfg80211_internal_bss *bss; + int r; + + while (n) { + bss = rb_entry(n, struct cfg80211_internal_bss, rbn); + r = cmp_hidden_bss(&res->pub, &bss->pub); + + if (r == 0) + return bss; + else if (r < 0) + n = n->rb_left; + else + n = n->rb_right; + } + + return NULL; +} + +static void +copy_hidden_ies(struct cfg80211_internal_bss *res, + struct cfg80211_internal_bss *hidden) +{ + if (unlikely(res->pub.beacon_ies)) + return; + if (WARN_ON(!hidden->pub.beacon_ies)) + return; + + res->pub.beacon_ies = kmalloc(hidden->pub.len_beacon_ies, GFP_ATOMIC); + if (unlikely(!res->pub.beacon_ies)) + return; + + res->beacon_ies_allocated = true; + res->pub.len_beacon_ies = hidden->pub.len_beacon_ies; + memcpy(res->pub.beacon_ies, hidden->pub.beacon_ies, + res->pub.len_beacon_ies); +} + +static struct cfg80211_internal_bss * cfg80211_bss_update(struct cfg80211_registered_device *dev, struct cfg80211_internal_bss *res) { @@ -576,6 +703,21 @@ cfg80211_bss_update(struct cfg80211_registered_device *dev, kref_put(&res->ref, bss_release); } else { + struct cfg80211_internal_bss *hidden; + + /* First check if the beacon is a probe response from + * a hidden bss. If so, copy beacon ies (with nullified + * ssid) into the probe response bss entry (with real ssid). + * It is required basically for PSM implementation + * (probe responses do not contain tim ie) */ + + /* TODO: The code is not trying to update existing probe + * response bss entries when beacon ies are + * getting changed. */ + hidden = rb_find_hidden_bss(dev, res); + if (hidden) + copy_hidden_ies(res, hidden); + /* this "consumes" the reference */ list_add_tail(&res->list, &dev->bss_list); rb_insert_bss(dev, res); diff --git a/net/wireless/sme.c b/net/wireless/sme.c index dec0fa28372e..7b9ecaed96be 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -10,6 +10,7 @@ #include <linux/slab.h> #include <linux/workqueue.h> #include <linux/wireless.h> +#include <linux/export.h> #include <net/iw_handler.h> #include <net/cfg80211.h> #include <net/rtnetlink.h> @@ -110,17 +111,22 @@ static int cfg80211_conn_scan(struct wireless_dev *wdev) else { int i = 0, j; enum ieee80211_band band; + struct ieee80211_supported_band *bands; + struct ieee80211_channel *channel; for (band = 0; band < IEEE80211_NUM_BANDS; band++) { - if (!wdev->wiphy->bands[band]) + bands = wdev->wiphy->bands[band]; + if (!bands) continue; - for (j = 0; j < wdev->wiphy->bands[band]->n_channels; - i++, j++) - request->channels[i] = - &wdev->wiphy->bands[band]->channels[j]; - request->rates[band] = - (1 << wdev->wiphy->bands[band]->n_bitrates) - 1; + for (j = 0; j < bands->n_channels; j++) { + channel = &bands->channels[j]; + if (channel->flags & IEEE80211_CHAN_DISABLED) + continue; + request->channels[i++] = channel; + } + request->rates[band] = (1 << bands->n_bitrates) - 1; } + n_channels = i; } request->n_channels = n_channels; request->ssids = (void *)&request->channels[n_channels]; @@ -184,7 +190,9 @@ static int cfg80211_conn_do_work(struct wireless_dev *wdev) prev_bssid, params->ssid, params->ssid_len, params->ie, params->ie_len, - false, ¶ms->crypto); + false, ¶ms->crypto, + params->flags, ¶ms->ht_capa, + ¶ms->ht_capa_mask); if (err) __cfg80211_mlme_deauth(rdev, wdev->netdev, params->bssid, NULL, 0, @@ -545,45 +553,35 @@ void cfg80211_connect_result(struct net_device *dev, const u8 *bssid, EXPORT_SYMBOL(cfg80211_connect_result); void __cfg80211_roamed(struct wireless_dev *wdev, - struct ieee80211_channel *channel, - const u8 *bssid, + struct cfg80211_bss *bss, const u8 *req_ie, size_t req_ie_len, const u8 *resp_ie, size_t resp_ie_len) { - struct cfg80211_bss *bss; #ifdef CONFIG_CFG80211_WEXT union iwreq_data wrqu; #endif - ASSERT_WDEV_LOCK(wdev); if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION && wdev->iftype != NL80211_IFTYPE_P2P_CLIENT)) - return; + goto out; if (wdev->sme_state != CFG80211_SME_CONNECTED) - return; + goto out; /* internal error -- how did we get to CONNECTED w/o BSS? */ if (WARN_ON(!wdev->current_bss)) { - return; + goto out; } cfg80211_unhold_bss(wdev->current_bss); cfg80211_put_bss(&wdev->current_bss->pub); wdev->current_bss = NULL; - bss = cfg80211_get_bss(wdev->wiphy, channel, bssid, - wdev->ssid, wdev->ssid_len, - WLAN_CAPABILITY_ESS, WLAN_CAPABILITY_ESS); - - if (WARN_ON(!bss)) - return; - cfg80211_hold_bss(bss_from_pub(bss)); wdev->current_bss = bss_from_pub(bss); - nl80211_send_roamed(wiphy_to_dev(wdev->wiphy), wdev->netdev, bssid, + nl80211_send_roamed(wiphy_to_dev(wdev->wiphy), wdev->netdev, bss->bssid, req_ie, req_ie_len, resp_ie, resp_ie_len, GFP_KERNEL); @@ -604,11 +602,15 @@ void __cfg80211_roamed(struct wireless_dev *wdev, memset(&wrqu, 0, sizeof(wrqu)); wrqu.ap_addr.sa_family = ARPHRD_ETHER; - memcpy(wrqu.ap_addr.sa_data, bssid, ETH_ALEN); - memcpy(wdev->wext.prev_bssid, bssid, ETH_ALEN); + memcpy(wrqu.ap_addr.sa_data, bss->bssid, ETH_ALEN); + memcpy(wdev->wext.prev_bssid, bss->bssid, ETH_ALEN); wdev->wext.prev_bssid_valid = true; wireless_send_event(wdev->netdev, SIOCGIWAP, &wrqu, NULL); #endif + + return; +out: + cfg80211_put_bss(bss); } void cfg80211_roamed(struct net_device *dev, @@ -618,32 +620,57 @@ void cfg80211_roamed(struct net_device *dev, const u8 *resp_ie, size_t resp_ie_len, gfp_t gfp) { struct wireless_dev *wdev = dev->ieee80211_ptr; + struct cfg80211_bss *bss; + + CFG80211_DEV_WARN_ON(wdev->sme_state != CFG80211_SME_CONNECTED); + + bss = cfg80211_get_bss(wdev->wiphy, channel, bssid, wdev->ssid, + wdev->ssid_len, WLAN_CAPABILITY_ESS, + WLAN_CAPABILITY_ESS); + if (WARN_ON(!bss)) + return; + + cfg80211_roamed_bss(dev, bss, req_ie, req_ie_len, resp_ie, + resp_ie_len, gfp); +} +EXPORT_SYMBOL(cfg80211_roamed); + +void cfg80211_roamed_bss(struct net_device *dev, + struct cfg80211_bss *bss, const u8 *req_ie, + size_t req_ie_len, const u8 *resp_ie, + size_t resp_ie_len, gfp_t gfp) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); struct cfg80211_event *ev; unsigned long flags; CFG80211_DEV_WARN_ON(wdev->sme_state != CFG80211_SME_CONNECTED); + if (WARN_ON(!bss)) + return; + ev = kzalloc(sizeof(*ev) + req_ie_len + resp_ie_len, gfp); - if (!ev) + if (!ev) { + cfg80211_put_bss(bss); return; + } ev->type = EVENT_ROAMED; - ev->rm.channel = channel; - memcpy(ev->rm.bssid, bssid, ETH_ALEN); ev->rm.req_ie = ((u8 *)ev) + sizeof(*ev); ev->rm.req_ie_len = req_ie_len; memcpy((void *)ev->rm.req_ie, req_ie, req_ie_len); ev->rm.resp_ie = ((u8 *)ev) + sizeof(*ev) + req_ie_len; ev->rm.resp_ie_len = resp_ie_len; memcpy((void *)ev->rm.resp_ie, resp_ie, resp_ie_len); + ev->rm.bss = bss; spin_lock_irqsave(&wdev->event_lock, flags); list_add_tail(&ev->list, &wdev->event_list); spin_unlock_irqrestore(&wdev->event_lock, flags); queue_work(cfg80211_wq, &rdev->event_work); } -EXPORT_SYMBOL(cfg80211_roamed); +EXPORT_SYMBOL(cfg80211_roamed_bss); void __cfg80211_disconnected(struct net_device *dev, const u8 *ie, size_t ie_len, u16 reason, bool from_ap) @@ -768,6 +795,9 @@ int __cfg80211_connect(struct cfg80211_registered_device *rdev, wdev->connect_keys = NULL; } + cfg80211_oper_and_ht_capa(&connect->ht_capa_mask, + rdev->wiphy.ht_capa_mod_mask); + if (connkeys && connkeys->def >= 0) { int idx; u32 cipher; diff --git a/net/wireless/util.c b/net/wireless/util.c index be75a3a0424e..9aa9db6c8141 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -3,11 +3,13 @@ * * Copyright 2007-2009 Johannes Berg <johannes@sipsolutions.net> */ +#include <linux/export.h> #include <linux/bitops.h> #include <linux/etherdevice.h> #include <linux/slab.h> #include <net/cfg80211.h> #include <net/ip.h> +#include <net/dsfield.h> #include "core.h" struct ieee80211_rate * @@ -150,12 +152,19 @@ void ieee80211_set_bitrate_flags(struct wiphy *wiphy) set_mandatory_flags_band(wiphy->bands[band], band); } +bool cfg80211_supported_cipher_suite(struct wiphy *wiphy, u32 cipher) +{ + int i; + for (i = 0; i < wiphy->n_cipher_suites; i++) + if (cipher == wiphy->cipher_suites[i]) + return true; + return false; +} + int cfg80211_validate_key_settings(struct cfg80211_registered_device *rdev, struct key_params *params, int key_idx, bool pairwise, const u8 *mac_addr) { - int i; - if (key_idx > 5) return -EINVAL; @@ -225,26 +234,12 @@ int cfg80211_validate_key_settings(struct cfg80211_registered_device *rdev, } } - for (i = 0; i < rdev->wiphy.n_cipher_suites; i++) - if (params->cipher == rdev->wiphy.cipher_suites[i]) - break; - if (i == rdev->wiphy.n_cipher_suites) + if (!cfg80211_supported_cipher_suite(&rdev->wiphy, params->cipher)) return -EINVAL; return 0; } -/* See IEEE 802.1H for LLC/SNAP encapsulation/decapsulation */ -/* Ethernet-II snap header (RFC1042 for most EtherTypes) */ -const unsigned char rfc1042_header[] __aligned(2) = - { 0xaa, 0xaa, 0x03, 0x00, 0x00, 0x00 }; -EXPORT_SYMBOL(rfc1042_header); - -/* Bridge-Tunnel header (for EtherTypes ETH_P_AARP and ETH_P_IPX) */ -const unsigned char bridge_tunnel_header[] __aligned(2) = - { 0xaa, 0xaa, 0x03, 0x00, 0x00, 0xf8 }; -EXPORT_SYMBOL(bridge_tunnel_header); - unsigned int __attribute_const__ ieee80211_hdrlen(__le16 fc) { unsigned int hdrlen = 24; @@ -391,8 +386,9 @@ int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr, } break; case cpu_to_le16(0): - if (iftype != NL80211_IFTYPE_ADHOC) - return -1; + if (iftype != NL80211_IFTYPE_ADHOC && + iftype != NL80211_IFTYPE_STATION) + return -1; break; } @@ -512,10 +508,9 @@ int ieee80211_data_from_8023(struct sk_buff *skb, const u8 *addr, if (head_need) skb_orphan(skb); - if (pskb_expand_head(skb, head_need, 0, GFP_ATOMIC)) { - pr_err("failed to reallocate Tx buffer\n"); + if (pskb_expand_head(skb, head_need, 0, GFP_ATOMIC)) return -ENOMEM; - } + skb->truesize += head_need; } @@ -656,7 +651,10 @@ unsigned int cfg80211_classify8021d(struct sk_buff *skb) switch (skb->protocol) { case htons(ETH_P_IP): - dscp = ip_hdr(skb)->tos & 0xfc; + dscp = ipv4_get_dsfield(ip_hdr(skb)) & 0xfc; + break; + case htons(ETH_P_IPV6): + dscp = ipv6_get_dsfield(ipv6_hdr(skb)) & 0xfc; break; default: return 0; @@ -746,9 +744,9 @@ static void cfg80211_process_wdev_events(struct wireless_dev *wdev) NULL); break; case EVENT_ROAMED: - __cfg80211_roamed(wdev, ev->rm.channel, ev->rm.bssid, - ev->rm.req_ie, ev->rm.req_ie_len, - ev->rm.resp_ie, ev->rm.resp_ie_len); + __cfg80211_roamed(wdev, ev->rm.bss, ev->rm.req_ie, + ev->rm.req_ie_len, ev->rm.resp_ie, + ev->rm.resp_ie_len); break; case EVENT_DISCONNECTED: __cfg80211_disconnected(wdev->netdev, @@ -1044,3 +1042,14 @@ int ieee80211_get_ratemask(struct ieee80211_supported_band *sband, return 0; } + +/* See IEEE 802.1H for LLC/SNAP encapsulation/decapsulation */ +/* Ethernet-II snap header (RFC1042 for most EtherTypes) */ +const unsigned char rfc1042_header[] __aligned(2) = + { 0xaa, 0xaa, 0x03, 0x00, 0x00, 0x00 }; +EXPORT_SYMBOL(rfc1042_header); + +/* Bridge-Tunnel header (for EtherTypes ETH_P_AARP and ETH_P_IPX) */ +const unsigned char bridge_tunnel_header[] __aligned(2) = + { 0xaa, 0xaa, 0x03, 0x00, 0x00, 0xf8 }; +EXPORT_SYMBOL(bridge_tunnel_header); diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index 0bf169bb770e..3c24eb97e9d7 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -8,6 +8,7 @@ * Copyright 2008-2009 Johannes Berg <johannes@sipsolutions.net> */ +#include <linux/export.h> #include <linux/wireless.h> #include <linux/nl80211.h> #include <linux/if_arp.h> @@ -15,6 +16,7 @@ #include <linux/slab.h> #include <net/iw_handler.h> #include <net/cfg80211.h> +#include <net/cfg80211-wext.h> #include "wext-compat.h" #include "core.h" @@ -363,9 +365,9 @@ int cfg80211_wext_giwfrag(struct net_device *dev, } EXPORT_SYMBOL_GPL(cfg80211_wext_giwfrag); -int cfg80211_wext_siwretry(struct net_device *dev, - struct iw_request_info *info, - struct iw_param *retry, char *extra) +static int cfg80211_wext_siwretry(struct net_device *dev, + struct iw_request_info *info, + struct iw_param *retry, char *extra) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); @@ -402,7 +404,6 @@ int cfg80211_wext_siwretry(struct net_device *dev, return err; } -EXPORT_SYMBOL_GPL(cfg80211_wext_siwretry); int cfg80211_wext_giwretry(struct net_device *dev, struct iw_request_info *info, @@ -593,9 +594,9 @@ static int cfg80211_set_encryption(struct cfg80211_registered_device *rdev, return err; } -int cfg80211_wext_siwencode(struct net_device *dev, - struct iw_request_info *info, - struct iw_point *erq, char *keybuf) +static int cfg80211_wext_siwencode(struct net_device *dev, + struct iw_request_info *info, + struct iw_point *erq, char *keybuf) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); @@ -652,11 +653,10 @@ int cfg80211_wext_siwencode(struct net_device *dev, wdev->wext.default_key == -1, idx, ¶ms); } -EXPORT_SYMBOL_GPL(cfg80211_wext_siwencode); -int cfg80211_wext_siwencodeext(struct net_device *dev, - struct iw_request_info *info, - struct iw_point *erq, char *extra) +static int cfg80211_wext_siwencodeext(struct net_device *dev, + struct iw_request_info *info, + struct iw_point *erq, char *extra) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); @@ -744,11 +744,10 @@ int cfg80211_wext_siwencodeext(struct net_device *dev, ext->ext_flags & IW_ENCODE_EXT_SET_TX_KEY, idx, ¶ms); } -EXPORT_SYMBOL_GPL(cfg80211_wext_siwencodeext); -int cfg80211_wext_giwencode(struct net_device *dev, - struct iw_request_info *info, - struct iw_point *erq, char *keybuf) +static int cfg80211_wext_giwencode(struct net_device *dev, + struct iw_request_info *info, + struct iw_point *erq, char *keybuf) { struct wireless_dev *wdev = dev->ieee80211_ptr; int idx; @@ -782,11 +781,10 @@ int cfg80211_wext_giwencode(struct net_device *dev, return 0; } -EXPORT_SYMBOL_GPL(cfg80211_wext_giwencode); -int cfg80211_wext_siwfreq(struct net_device *dev, - struct iw_request_info *info, - struct iw_freq *wextfreq, char *extra) +static int cfg80211_wext_siwfreq(struct net_device *dev, + struct iw_request_info *info, + struct iw_freq *wextfreq, char *extra) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); @@ -815,19 +813,30 @@ int cfg80211_wext_siwfreq(struct net_device *dev, return -EOPNOTSUPP; } } -EXPORT_SYMBOL_GPL(cfg80211_wext_siwfreq); -int cfg80211_wext_giwfreq(struct net_device *dev, - struct iw_request_info *info, - struct iw_freq *freq, char *extra) +static int cfg80211_wext_giwfreq(struct net_device *dev, + struct iw_request_info *info, + struct iw_freq *freq, char *extra) { struct wireless_dev *wdev = dev->ieee80211_ptr; + struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct ieee80211_channel *chan; switch (wdev->iftype) { case NL80211_IFTYPE_STATION: return cfg80211_mgd_wext_giwfreq(dev, info, freq, extra); case NL80211_IFTYPE_ADHOC: return cfg80211_ibss_wext_giwfreq(dev, info, freq, extra); + case NL80211_IFTYPE_MONITOR: + if (!rdev->ops->get_channel) + return -EINVAL; + + chan = rdev->ops->get_channel(wdev->wiphy); + if (!chan) + return -EINVAL; + freq->m = chan->center_freq; + freq->e = 6; + return 0; default: if (!wdev->channel) return -EINVAL; @@ -836,11 +845,10 @@ int cfg80211_wext_giwfreq(struct net_device *dev, return 0; } } -EXPORT_SYMBOL_GPL(cfg80211_wext_giwfreq); -int cfg80211_wext_siwtxpower(struct net_device *dev, - struct iw_request_info *info, - union iwreq_data *data, char *extra) +static int cfg80211_wext_siwtxpower(struct net_device *dev, + struct iw_request_info *info, + union iwreq_data *data, char *extra) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); @@ -889,11 +897,10 @@ int cfg80211_wext_siwtxpower(struct net_device *dev, return rdev->ops->set_tx_power(wdev->wiphy, type, DBM_TO_MBM(dbm)); } -EXPORT_SYMBOL_GPL(cfg80211_wext_siwtxpower); -int cfg80211_wext_giwtxpower(struct net_device *dev, - struct iw_request_info *info, - union iwreq_data *data, char *extra) +static int cfg80211_wext_giwtxpower(struct net_device *dev, + struct iw_request_info *info, + union iwreq_data *data, char *extra) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); @@ -919,7 +926,6 @@ int cfg80211_wext_giwtxpower(struct net_device *dev, return 0; } -EXPORT_SYMBOL_GPL(cfg80211_wext_giwtxpower); static int cfg80211_set_auth_alg(struct wireless_dev *wdev, s32 auth_alg) @@ -1070,9 +1076,9 @@ static int cfg80211_set_key_mgt(struct wireless_dev *wdev, u32 key_mgt) return 0; } -int cfg80211_wext_siwauth(struct net_device *dev, - struct iw_request_info *info, - struct iw_param *data, char *extra) +static int cfg80211_wext_siwauth(struct net_device *dev, + struct iw_request_info *info, + struct iw_param *data, char *extra) { struct wireless_dev *wdev = dev->ieee80211_ptr; @@ -1102,21 +1108,19 @@ int cfg80211_wext_siwauth(struct net_device *dev, return -EOPNOTSUPP; } } -EXPORT_SYMBOL_GPL(cfg80211_wext_siwauth); -int cfg80211_wext_giwauth(struct net_device *dev, - struct iw_request_info *info, - struct iw_param *data, char *extra) +static int cfg80211_wext_giwauth(struct net_device *dev, + struct iw_request_info *info, + struct iw_param *data, char *extra) { /* XXX: what do we need? */ return -EOPNOTSUPP; } -EXPORT_SYMBOL_GPL(cfg80211_wext_giwauth); -int cfg80211_wext_siwpower(struct net_device *dev, - struct iw_request_info *info, - struct iw_param *wrq, char *extra) +static int cfg80211_wext_siwpower(struct net_device *dev, + struct iw_request_info *info, + struct iw_param *wrq, char *extra) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); @@ -1160,11 +1164,10 @@ int cfg80211_wext_siwpower(struct net_device *dev, return 0; } -EXPORT_SYMBOL_GPL(cfg80211_wext_siwpower); -int cfg80211_wext_giwpower(struct net_device *dev, - struct iw_request_info *info, - struct iw_param *wrq, char *extra) +static int cfg80211_wext_giwpower(struct net_device *dev, + struct iw_request_info *info, + struct iw_param *wrq, char *extra) { struct wireless_dev *wdev = dev->ieee80211_ptr; @@ -1172,7 +1175,6 @@ int cfg80211_wext_giwpower(struct net_device *dev, return 0; } -EXPORT_SYMBOL_GPL(cfg80211_wext_giwpower); static int cfg80211_wds_wext_siwap(struct net_device *dev, struct iw_request_info *info, @@ -1218,9 +1220,9 @@ static int cfg80211_wds_wext_giwap(struct net_device *dev, return 0; } -int cfg80211_wext_siwrate(struct net_device *dev, - struct iw_request_info *info, - struct iw_param *rate, char *extra) +static int cfg80211_wext_siwrate(struct net_device *dev, + struct iw_request_info *info, + struct iw_param *rate, char *extra) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); @@ -1268,11 +1270,10 @@ int cfg80211_wext_siwrate(struct net_device *dev, return rdev->ops->set_bitrate_mask(wdev->wiphy, dev, NULL, &mask); } -EXPORT_SYMBOL_GPL(cfg80211_wext_siwrate); -int cfg80211_wext_giwrate(struct net_device *dev, - struct iw_request_info *info, - struct iw_param *rate, char *extra) +static int cfg80211_wext_giwrate(struct net_device *dev, + struct iw_request_info *info, + struct iw_param *rate, char *extra) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); @@ -1308,10 +1309,9 @@ int cfg80211_wext_giwrate(struct net_device *dev, return 0; } -EXPORT_SYMBOL_GPL(cfg80211_wext_giwrate); /* Get wireless statistics. Called by /proc/net/wireless and by SIOCGIWSTATS */ -struct iw_statistics *cfg80211_wireless_stats(struct net_device *dev) +static struct iw_statistics *cfg80211_wireless_stats(struct net_device *dev) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); @@ -1376,11 +1376,10 @@ struct iw_statistics *cfg80211_wireless_stats(struct net_device *dev) return &wstats; } -EXPORT_SYMBOL_GPL(cfg80211_wireless_stats); -int cfg80211_wext_siwap(struct net_device *dev, - struct iw_request_info *info, - struct sockaddr *ap_addr, char *extra) +static int cfg80211_wext_siwap(struct net_device *dev, + struct iw_request_info *info, + struct sockaddr *ap_addr, char *extra) { struct wireless_dev *wdev = dev->ieee80211_ptr; @@ -1395,11 +1394,10 @@ int cfg80211_wext_siwap(struct net_device *dev, return -EOPNOTSUPP; } } -EXPORT_SYMBOL_GPL(cfg80211_wext_siwap); -int cfg80211_wext_giwap(struct net_device *dev, - struct iw_request_info *info, - struct sockaddr *ap_addr, char *extra) +static int cfg80211_wext_giwap(struct net_device *dev, + struct iw_request_info *info, + struct sockaddr *ap_addr, char *extra) { struct wireless_dev *wdev = dev->ieee80211_ptr; @@ -1414,11 +1412,10 @@ int cfg80211_wext_giwap(struct net_device *dev, return -EOPNOTSUPP; } } -EXPORT_SYMBOL_GPL(cfg80211_wext_giwap); -int cfg80211_wext_siwessid(struct net_device *dev, - struct iw_request_info *info, - struct iw_point *data, char *ssid) +static int cfg80211_wext_siwessid(struct net_device *dev, + struct iw_request_info *info, + struct iw_point *data, char *ssid) { struct wireless_dev *wdev = dev->ieee80211_ptr; @@ -1431,11 +1428,10 @@ int cfg80211_wext_siwessid(struct net_device *dev, return -EOPNOTSUPP; } } -EXPORT_SYMBOL_GPL(cfg80211_wext_siwessid); -int cfg80211_wext_giwessid(struct net_device *dev, - struct iw_request_info *info, - struct iw_point *data, char *ssid) +static int cfg80211_wext_giwessid(struct net_device *dev, + struct iw_request_info *info, + struct iw_point *data, char *ssid) { struct wireless_dev *wdev = dev->ieee80211_ptr; @@ -1451,11 +1447,10 @@ int cfg80211_wext_giwessid(struct net_device *dev, return -EOPNOTSUPP; } } -EXPORT_SYMBOL_GPL(cfg80211_wext_giwessid); -int cfg80211_wext_siwpmksa(struct net_device *dev, - struct iw_request_info *info, - struct iw_point *data, char *extra) +static int cfg80211_wext_siwpmksa(struct net_device *dev, + struct iw_request_info *info, + struct iw_point *data, char *extra) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); @@ -1493,7 +1488,6 @@ int cfg80211_wext_siwpmksa(struct net_device *dev, return -EOPNOTSUPP; } } -EXPORT_SYMBOL_GPL(cfg80211_wext_siwpmksa); static const iw_handler cfg80211_handlers[] = { [IW_IOCTL_IDX(SIOCGIWNAME)] = (iw_handler) cfg80211_wext_giwname, diff --git a/net/wireless/wext-compat.h b/net/wireless/wext-compat.h index 20b3daef6964..5d766b0118e8 100644 --- a/net/wireless/wext-compat.h +++ b/net/wireless/wext-compat.h @@ -42,6 +42,14 @@ int cfg80211_mgd_wext_giwessid(struct net_device *dev, struct iw_request_info *info, struct iw_point *data, char *ssid); +int cfg80211_wext_siwmlme(struct net_device *dev, + struct iw_request_info *info, + struct iw_point *data, char *extra); +int cfg80211_wext_siwgenie(struct net_device *dev, + struct iw_request_info *info, + struct iw_point *data, char *extra); + + int cfg80211_wext_freq(struct wiphy *wiphy, struct iw_freq *freq); diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c index fdbc23c10d8c..0af7f54e4f61 100644 --- a/net/wireless/wext-core.c +++ b/net/wireless/wext-core.c @@ -13,6 +13,7 @@ #include <linux/slab.h> #include <linux/wireless.h> #include <linux/uaccess.h> +#include <linux/export.h> #include <net/cfg80211.h> #include <net/iw_handler.h> #include <net/netlink.h> diff --git a/net/wireless/wext-sme.c b/net/wireless/wext-sme.c index 6fffe62d7c25..326750b99151 100644 --- a/net/wireless/wext-sme.c +++ b/net/wireless/wext-sme.c @@ -5,10 +5,12 @@ * Copyright (C) 2009 Intel Corporation. All rights reserved. */ +#include <linux/export.h> #include <linux/etherdevice.h> #include <linux/if_arp.h> #include <linux/slab.h> #include <net/cfg80211.h> +#include <net/cfg80211-wext.h> #include "wext-compat.h" #include "nl80211.h" @@ -365,7 +367,6 @@ int cfg80211_wext_siwgenie(struct net_device *dev, wdev_unlock(wdev); return err; } -EXPORT_SYMBOL_GPL(cfg80211_wext_siwgenie); int cfg80211_wext_siwmlme(struct net_device *dev, struct iw_request_info *info, @@ -402,4 +403,3 @@ int cfg80211_wext_siwmlme(struct net_device *dev, return err; } -EXPORT_SYMBOL_GPL(cfg80211_wext_siwmlme); diff --git a/net/wireless/wext-spy.c b/net/wireless/wext-spy.c index 6dcfe65a2d1a..5d643a548feb 100644 --- a/net/wireless/wext-spy.c +++ b/net/wireless/wext-spy.c @@ -10,6 +10,7 @@ #include <linux/wireless.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> +#include <linux/export.h> #include <net/iw_handler.h> #include <net/arp.h> #include <net/wext.h> diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 5f03e4ea65bf..a306bc66000e 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -232,7 +232,7 @@ static int x25_device_event(struct notifier_block *this, unsigned long event, return NOTIFY_DONE; if (dev->type == ARPHRD_X25 -#if defined(CONFIG_LLC) || defined(CONFIG_LLC_MODULE) +#if IS_ENABLED(CONFIG_LLC) || dev->type == ARPHRD_ETHER #endif ) { @@ -1261,14 +1261,19 @@ static int x25_recvmsg(struct kiocb *iocb, struct socket *sock, struct x25_sock *x25 = x25_sk(sk); struct sockaddr_x25 *sx25 = (struct sockaddr_x25 *)msg->msg_name; size_t copied; - int qbit, header_len = x25->neighbour->extended ? - X25_EXT_MIN_LEN : X25_STD_MIN_LEN; - + int qbit, header_len; struct sk_buff *skb; unsigned char *asmptr; int rc = -ENOTCONN; lock_sock(sk); + + if (x25->neighbour == NULL) + goto out; + + header_len = x25->neighbour->extended ? + X25_EXT_MIN_LEN : X25_STD_MIN_LEN; + /* * This works for seqpacket too. The receiver has ordered the queue for * us! We do one quick check first though diff --git a/net/x25/x25_dev.c b/net/x25/x25_dev.c index fa2b41888bd9..f0ce862d1f46 100644 --- a/net/x25/x25_dev.c +++ b/net/x25/x25_dev.c @@ -161,7 +161,7 @@ void x25_establish_link(struct x25_neigh *nb) *ptr = X25_IFACE_CONNECT; break; -#if defined(CONFIG_LLC) || defined(CONFIG_LLC_MODULE) +#if IS_ENABLED(CONFIG_LLC) case ARPHRD_ETHER: return; #endif @@ -180,7 +180,7 @@ void x25_terminate_link(struct x25_neigh *nb) struct sk_buff *skb; unsigned char *ptr; -#if defined(CONFIG_LLC) || defined(CONFIG_LLC_MODULE) +#if IS_ENABLED(CONFIG_LLC) if (nb->dev->type == ARPHRD_ETHER) return; #endif @@ -213,7 +213,7 @@ void x25_send_frame(struct sk_buff *skb, struct x25_neigh *nb) *dptr = X25_IFACE_DATA; break; -#if defined(CONFIG_LLC) || defined(CONFIG_LLC_MODULE) +#if IS_ENABLED(CONFIG_LLC) case ARPHRD_ETHER: kfree_skb(skb); return; diff --git a/net/x25/x25_proc.c b/net/x25/x25_proc.c index 7ff373792324..2ffde4631ae2 100644 --- a/net/x25/x25_proc.c +++ b/net/x25/x25_proc.c @@ -20,6 +20,7 @@ #include <linux/init.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> +#include <linux/export.h> #include <net/net_namespace.h> #include <net/sock.h> #include <net/x25.h> diff --git a/net/x25/x25_route.c b/net/x25/x25_route.c index 97d77c532d8c..cf6366270054 100644 --- a/net/x25/x25_route.c +++ b/net/x25/x25_route.c @@ -134,7 +134,7 @@ struct net_device *x25_dev_get(char *devname) if (dev && (!(dev->flags & IFF_UP) || (dev->type != ARPHRD_X25 -#if defined(CONFIG_LLC) || defined(CONFIG_LLC_MODULE) +#if IS_ENABLED(CONFIG_LLC) && dev->type != ARPHRD_ETHER #endif ))){ diff --git a/net/xfrm/xfrm_ipcomp.c b/net/xfrm/xfrm_ipcomp.c index fc91ad7ee26e..e5246fbe36c4 100644 --- a/net/xfrm/xfrm_ipcomp.c +++ b/net/xfrm/xfrm_ipcomp.c @@ -70,26 +70,29 @@ static int ipcomp_decompress(struct xfrm_state *x, struct sk_buff *skb) while ((scratch += len, dlen -= len) > 0) { skb_frag_t *frag; + struct page *page; err = -EMSGSIZE; if (WARN_ON(skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS)) goto out; frag = skb_shinfo(skb)->frags + skb_shinfo(skb)->nr_frags; - frag->page = alloc_page(GFP_ATOMIC); + page = alloc_page(GFP_ATOMIC); err = -ENOMEM; - if (!frag->page) + if (!page) goto out; + __skb_frag_set_page(frag, page); + len = PAGE_SIZE; if (dlen < len) len = dlen; - memcpy(page_address(frag->page), scratch, len); - frag->page_offset = 0; - frag->size = len; + skb_frag_size_set(frag, len); + memcpy(skb_frag_address(frag), scratch, len); + skb->truesize += len; skb->data_len += len; skb->len += len; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 552df27dcf53..7661576b6f45 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -61,8 +61,8 @@ __xfrm4_selector_match(const struct xfrm_selector *sel, const struct flowi *fl) { const struct flowi4 *fl4 = &fl->u.ip4; - return addr_match(&fl4->daddr, &sel->daddr, sel->prefixlen_d) && - addr_match(&fl4->saddr, &sel->saddr, sel->prefixlen_s) && + return addr4_match(fl4->daddr, sel->daddr.a4, sel->prefixlen_d) && + addr4_match(fl4->saddr, sel->saddr.a4, sel->prefixlen_s) && !((xfrm_flowi_dport(fl, &fl4->uli) ^ sel->dport) & sel->dport_mask) && !((xfrm_flowi_sport(fl, &fl4->uli) ^ sel->sport) & sel->sport_mask) && (fl4->flowi4_proto == sel->proto || !sel->proto) && @@ -1340,7 +1340,7 @@ static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family) case AF_INET: dst_ops = &net->xfrm.xfrm4_dst_ops; break; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) case AF_INET6: dst_ops = &net->xfrm.xfrm6_dst_ops; break; @@ -1499,7 +1499,7 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, goto free_dst; /* Copy neighbour for reachability confirmation */ - dst_set_neighbour(dst0, neigh_clone(dst_get_neighbour(dst))); + dst_set_neighbour(dst0, neigh_clone(dst_get_neighbour_noref(dst))); xfrm_init_path((struct xfrm_dst *)dst0, dst, nfheader_len); xfrm_init_pmtu(dst_prev); @@ -2276,8 +2276,6 @@ static void __xfrm_garbage_collect(struct net *net) { struct dst_entry *head, *next; - flow_cache_flush(); - spin_lock_bh(&xfrm_policy_sk_bundle_lock); head = xfrm_policy_sk_bundles; xfrm_policy_sk_bundles = NULL; @@ -2290,6 +2288,18 @@ static void __xfrm_garbage_collect(struct net *net) } } +static void xfrm_garbage_collect(struct net *net) +{ + flow_cache_flush(); + __xfrm_garbage_collect(net); +} + +static void xfrm_garbage_collect_deferred(struct net *net) +{ + flow_cache_flush_deferred(); + __xfrm_garbage_collect(net); +} + static void xfrm_init_pmtu(struct dst_entry *dst) { do { @@ -2382,9 +2392,11 @@ static unsigned int xfrm_default_advmss(const struct dst_entry *dst) return dst_metric_advmss(dst->path); } -static unsigned int xfrm_default_mtu(const struct dst_entry *dst) +static unsigned int xfrm_mtu(const struct dst_entry *dst) { - return dst_mtu(dst->path); + unsigned int mtu = dst_metric_raw(dst, RTAX_MTU); + + return mtu ? : dst_mtu(dst->path); } static struct neighbour *xfrm_neigh_lookup(const struct dst_entry *dst, const void *daddr) @@ -2411,8 +2423,8 @@ int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo) dst_ops->check = xfrm_dst_check; if (likely(dst_ops->default_advmss == NULL)) dst_ops->default_advmss = xfrm_default_advmss; - if (likely(dst_ops->default_mtu == NULL)) - dst_ops->default_mtu = xfrm_default_mtu; + if (likely(dst_ops->mtu == NULL)) + dst_ops->mtu = xfrm_mtu; if (likely(dst_ops->negative_advice == NULL)) dst_ops->negative_advice = xfrm_negative_advice; if (likely(dst_ops->link_failure == NULL)) @@ -2420,7 +2432,7 @@ int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo) if (likely(dst_ops->neigh_lookup == NULL)) dst_ops->neigh_lookup = xfrm_neigh_lookup; if (likely(afinfo->garbage_collect == NULL)) - afinfo->garbage_collect = __xfrm_garbage_collect; + afinfo->garbage_collect = xfrm_garbage_collect_deferred; xfrm_policy_afinfo[afinfo->family] = afinfo; } write_unlock_bh(&xfrm_policy_afinfo_lock); @@ -2433,7 +2445,7 @@ int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo) case AF_INET: xfrm_dst_ops = &net->xfrm.xfrm4_dst_ops; break; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) case AF_INET6: xfrm_dst_ops = &net->xfrm.xfrm6_dst_ops; break; @@ -2483,7 +2495,7 @@ static void __net_init xfrm_dst_ops_init(struct net *net) afinfo = xfrm_policy_afinfo[AF_INET]; if (afinfo) net->xfrm.xfrm4_dst_ops = *afinfo->dst_ops; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) afinfo = xfrm_policy_afinfo[AF_INET6]; if (afinfo) net->xfrm.xfrm6_dst_ops = *afinfo->dst_ops; @@ -2514,7 +2526,7 @@ static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void switch (event) { case NETDEV_DOWN: - __xfrm_garbage_collect(dev_net(dev)); + xfrm_garbage_collect(dev_net(dev)); } return NOTIFY_DONE; } diff --git a/net/xfrm/xfrm_proc.c b/net/xfrm/xfrm_proc.c index 58d9ae005597..d0a1af8ed584 100644 --- a/net/xfrm/xfrm_proc.c +++ b/net/xfrm/xfrm_proc.c @@ -12,6 +12,7 @@ */ #include <linux/proc_fs.h> #include <linux/seq_file.h> +#include <linux/export.h> #include <net/snmp.h> #include <net/xfrm.h> diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c index b11ea692bd7d..39e02c54ed26 100644 --- a/net/xfrm/xfrm_replay.c +++ b/net/xfrm/xfrm_replay.c @@ -18,6 +18,7 @@ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. */ +#include <linux/export.h> #include <net/xfrm.h> u32 xfrm_replay_seqhi(struct xfrm_state *x, __be32 net_seq) @@ -203,8 +204,6 @@ static int xfrm_replay_check_bmp(struct xfrm_state *x, if (!replay_esn->replay_window) return 0; - pos = (replay_esn->seq - 1) % replay_esn->replay_window; - if (unlikely(seq == 0)) goto err; @@ -216,19 +215,18 @@ static int xfrm_replay_check_bmp(struct xfrm_state *x, goto err; } - if (pos >= diff) { + pos = (replay_esn->seq - 1) % replay_esn->replay_window; + + if (pos >= diff) bitnr = (pos - diff) % replay_esn->replay_window; - nr = bitnr >> 5; - bitnr = bitnr & 0x1F; - if (replay_esn->bmp[nr] & (1U << bitnr)) - goto err_replay; - } else { + else bitnr = replay_esn->replay_window - (diff - pos); - nr = bitnr >> 5; - bitnr = bitnr & 0x1F; - if (replay_esn->bmp[nr] & (1U << bitnr)) - goto err_replay; - } + + nr = bitnr >> 5; + bitnr = bitnr & 0x1F; + if (replay_esn->bmp[nr] & (1U << bitnr)) + goto err_replay; + return 0; err_replay: @@ -259,39 +257,27 @@ static void xfrm_replay_advance_bmp(struct xfrm_state *x, __be32 net_seq) bitnr = bitnr & 0x1F; replay_esn->bmp[nr] &= ~(1U << bitnr); } - - bitnr = (pos + diff) % replay_esn->replay_window; - nr = bitnr >> 5; - bitnr = bitnr & 0x1F; - replay_esn->bmp[nr] |= (1U << bitnr); } else { nr = (replay_esn->replay_window - 1) >> 5; for (i = 0; i <= nr; i++) replay_esn->bmp[i] = 0; - - bitnr = (pos + diff) % replay_esn->replay_window; - nr = bitnr >> 5; - bitnr = bitnr & 0x1F; - replay_esn->bmp[nr] |= (1U << bitnr); } + bitnr = (pos + diff) % replay_esn->replay_window; replay_esn->seq = seq; } else { diff = replay_esn->seq - seq; - if (pos >= diff) { + if (pos >= diff) bitnr = (pos - diff) % replay_esn->replay_window; - nr = bitnr >> 5; - bitnr = bitnr & 0x1F; - replay_esn->bmp[nr] |= (1U << bitnr); - } else { + else bitnr = replay_esn->replay_window - (diff - pos); - nr = bitnr >> 5; - bitnr = bitnr & 0x1F; - replay_esn->bmp[nr] |= (1U << bitnr); - } } + nr = bitnr >> 5; + bitnr = bitnr & 0x1F; + replay_esn->bmp[nr] |= (1U << bitnr); + if (xfrm_aevent_is_on(xs_net(x))) xfrm_replay_notify(x, XFRM_REPLAY_UPDATE); } @@ -390,8 +376,6 @@ static int xfrm_replay_check_esn(struct xfrm_state *x, if (!wsize) return 0; - pos = (replay_esn->seq - 1) % replay_esn->replay_window; - if (unlikely(seq == 0 && replay_esn->seq_hi == 0 && (replay_esn->seq < replay_esn->replay_window - 1))) goto err; @@ -415,19 +399,18 @@ static int xfrm_replay_check_esn(struct xfrm_state *x, goto err; } - if (pos >= diff) { + pos = (replay_esn->seq - 1) % replay_esn->replay_window; + + if (pos >= diff) bitnr = (pos - diff) % replay_esn->replay_window; - nr = bitnr >> 5; - bitnr = bitnr & 0x1F; - if (replay_esn->bmp[nr] & (1U << bitnr)) - goto err_replay; - } else { + else bitnr = replay_esn->replay_window - (diff - pos); - nr = bitnr >> 5; - bitnr = bitnr & 0x1F; - if (replay_esn->bmp[nr] & (1U << bitnr)) - goto err_replay; - } + + nr = bitnr >> 5; + bitnr = bitnr & 0x1F; + if (replay_esn->bmp[nr] & (1U << bitnr)) + goto err_replay; + return 0; err_replay: @@ -465,22 +448,13 @@ static void xfrm_replay_advance_esn(struct xfrm_state *x, __be32 net_seq) bitnr = bitnr & 0x1F; replay_esn->bmp[nr] &= ~(1U << bitnr); } - - bitnr = (pos + diff) % replay_esn->replay_window; - nr = bitnr >> 5; - bitnr = bitnr & 0x1F; - replay_esn->bmp[nr] |= (1U << bitnr); } else { nr = (replay_esn->replay_window - 1) >> 5; for (i = 0; i <= nr; i++) replay_esn->bmp[i] = 0; - - bitnr = (pos + diff) % replay_esn->replay_window; - nr = bitnr >> 5; - bitnr = bitnr & 0x1F; - replay_esn->bmp[nr] |= (1U << bitnr); } + bitnr = (pos + diff) % replay_esn->replay_window; replay_esn->seq = seq; if (unlikely(wrap > 0)) @@ -488,19 +462,16 @@ static void xfrm_replay_advance_esn(struct xfrm_state *x, __be32 net_seq) } else { diff = replay_esn->seq - seq; - if (pos >= diff) { + if (pos >= diff) bitnr = (pos - diff) % replay_esn->replay_window; - nr = bitnr >> 5; - bitnr = bitnr & 0x1F; - replay_esn->bmp[nr] |= (1U << bitnr); - } else { + else bitnr = replay_esn->replay_window - (diff - pos); - nr = bitnr >> 5; - bitnr = bitnr & 0x1F; - replay_esn->bmp[nr] |= (1U << bitnr); - } } + nr = bitnr >> 5; + bitnr = bitnr & 0x1F; + replay_esn->bmp[nr] |= (1U << bitnr); + if (xfrm_aevent_is_on(xs_net(x))) xfrm_replay_notify(x, XFRM_REPLAY_UPDATE); } diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 9414b9c5b1e4..5b228f97d4b3 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -1035,16 +1035,12 @@ static struct xfrm_state *__find_acq_core(struct net *net, struct xfrm_mark *m, break; case AF_INET6: - ipv6_addr_copy((struct in6_addr *)x->sel.daddr.a6, - (const struct in6_addr *)daddr); - ipv6_addr_copy((struct in6_addr *)x->sel.saddr.a6, - (const struct in6_addr *)saddr); + *(struct in6_addr *)x->sel.daddr.a6 = *(struct in6_addr *)daddr; + *(struct in6_addr *)x->sel.saddr.a6 = *(struct in6_addr *)saddr; x->sel.prefixlen_d = 128; x->sel.prefixlen_s = 128; - ipv6_addr_copy((struct in6_addr *)x->props.saddr.a6, - (const struct in6_addr *)saddr); - ipv6_addr_copy((struct in6_addr *)x->id.daddr.a6, - (const struct in6_addr *)daddr); + *(struct in6_addr *)x->props.saddr.a6 = *(struct in6_addr *)saddr; + *(struct in6_addr *)x->id.daddr.a6 = *(struct in6_addr *)daddr; break; } diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 71de86698efa..66b84fbf2746 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -28,7 +28,7 @@ #include <net/netlink.h> #include <net/ah.h> #include <asm/uaccess.h> -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) #include <linux/in6.h> #endif @@ -150,7 +150,7 @@ static int verify_newsa_info(struct xfrm_usersa_info *p, break; case AF_INET6: -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) break; #else err = -EAFNOSUPPORT; @@ -201,7 +201,7 @@ static int verify_newsa_info(struct xfrm_usersa_info *p, goto out; break; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) case IPPROTO_DSTOPTS: case IPPROTO_ROUTING: if (attrs[XFRMA_ALG_COMP] || @@ -1160,7 +1160,7 @@ static int verify_newpolicy_info(struct xfrm_userpolicy_info *p) break; case AF_INET6: -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) break; #else return -EAFNOSUPPORT; @@ -1231,7 +1231,7 @@ static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family) switch (ut[i].family) { case AF_INET: break; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) case AF_INET6: break; #endif @@ -2604,7 +2604,7 @@ static struct xfrm_policy *xfrm_compile_policy(struct sock *sk, int opt, return NULL; } break; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) case AF_INET6: if (opt != IPV6_XFRM_POLICY) { *dir = -EOPNOTSUPP; @@ -2935,7 +2935,7 @@ static void __net_exit xfrm_user_net_exit(struct list_head *net_exit_list) { struct net *net; list_for_each_entry(net, net_exit_list, exit_list) - rcu_assign_pointer(net->xfrm.nlsk, NULL); + RCU_INIT_POINTER(net->xfrm.nlsk, NULL); synchronize_net(); list_for_each_entry(net, net_exit_list, exit_list) netlink_kernel_release(net->xfrm.nlsk_stash); |